No topics yet. Start the conversation.

Summary

User supplied summary for the plot

Finance: Crypto prices from yfinance

Description

The below description is supplied in free-text by the user
    # %%%
import pandas as pd, sys, inspect, re, calendar, time
import yfinance as yf
from datetime import date, timedelta
from itertools import islice
import os
import numpy as np
from dotenv import load_dotenv
from supabase import create_client
from novem import Plot
from novem.colors import StaticColor as SC
from novem.table import Selector as S

load_dotenv()

MARKET_CONFIG = {
    "commodities": {
        "title": "Commodities",
        "data": {
            # Energy
            'BZ=F': 'Oil Brent ($/bbl)',
            'CL=F': 'Oil WTI ($/bbl)',
            'TTF=F': 'Gas TTF (€/MWh)',

            # Metals
            'GC=F': 'Gold COMEX ($/oz)',
            'SI=F': 'Silver COMEX ($/oz)',
            'PL=F': 'Platinum COMEX ($/oz)',
            'HG=F': 'Copper COMEX ($/lb)',
            'ALI=F': 'Aluminum COMEX ($/t)',

            # Agriculture / Softs
            'CC=F': 'Cocoa ICE ($/t)',
            'KC=F': 'Coffee ICE (¢/lb)',
            'SB=F': 'Sugar ICE (¢/lb)',
            'ZW=F': 'Wheat CBOT (¢/bu)',
            'ZC=F': 'Corn CBOT (¢/bu)',
            'ZS=F': 'Soy CBOT (¢/bu)',
        },
        "border": [0, 3, 8, -1],
    },

    "cryptos": {
        "title": "Cryptocurrencies",
        "data": {
            'BTC-USD': 'Bitcoin (USD)',
            'ETH-USD': 'Ether (USD)',
            'XRP-USD': 'Ripple (USD)',
            'ADA-USD': 'Cardano (USD)',
            'SOL-USD': 'Solana (USD)',
            'LTC-USD': 'Litecoin (USD)',
            'DOT-USD': 'Polkadot (USD)',
            'DOGE-USD': 'Dogecoin (USD)'
        },
        "border": [0, 2, -1],
    },

    "eq_markets": {
       "title": "Equity Markets",
       "data": {
            # US
            '^GSPC': 'S&P 500 (USD)',
            '^IXIC': 'Nasdaq (USD)',
            '^RUT': 'Russell 2000 (USD)',

            # America other
            '^GSPTSE': 'TSX (CAD)',
            '^MXX': 'IPC (MXN)',
            '^BVSP': 'Bovespa (BRL)',

            # Eurozone
            '^GDAXI': 'DAX (DE, EUR)',
            '^FCHI': 'CAC 40 (FR, EUR)',
            '^IBEX': 'IBEX 35 (ES, EUR)',

            # Europe other
            '^FTSE': 'FTSE 100 (GBP)',
            '^SSMI': 'SMI (CHF)',
            'OBX.OL': 'OBX 25 (NOK)',
            '^OMX': 'OMX 30 (SEK)',
            '^OMXC25': 'OMX 25 (DKK)',

            # Asia-Pacific
            '^N225': 'Nikkei 225 (JPY)',
            '^HSI': 'Hang Seng (HKD)',
            '000001.SS': 'Shanghai Comp (CNY)',
            '^BSESN': 'BSE Sensex (INR)',
            '^AORD': 'ASX 200 (AUD)'
        },
        "border": [0, 3, 6, 9, 14, -1],
    },
}

HORIZONS = {'1d': 1, '1w': 5, '1m': 22, '1y': 252}
SHORT_HORIZONS = {'1d': 1, '1w': 5}  # use specific contract for these
FUTURES_MONTH = {'F': 'Jan', 'G': 'Feb', 'H': 'Mar', 'J': 'Apr', 'K': 'May',
                 'M': 'Jun', 'N': 'Jul', 'Q': 'Aug', 'U': 'Sep', 'V': 'Oct',
                 'X': 'Nov', 'Z': 'Dec'}
MONTH_CODE = {'F':1,'G':2,'H':3,'J':4,'K':5,'M':6,'N':7,'Q':8,'U':9,'V':10,'X':11,'Z':12}
MONTH_TO_CODE = {v: k for k, v in MONTH_CODE.items()}
YAHOO_URL = 'https://finance.yahoo.com/quote/'

# Deterministic roll calendar — aligned with Bloomberg/Refinitiv convention
# (roll at expiry / last trading day, matching FT, DN.no, etc.).
# roll_offset: months relative to delivery month where the expiry falls.
# roll_day: approximate last trading day (calendar day) in that month.
#           Contract is shown through this day; switches the day after.
# months: active delivery months for the commodity.
ROLL_CONFIG = {
    # Energy — monthly contracts
    'BZ=F':  {'base':'BZ', 'exchange':'NYM','months':list(range(1,13)),'roll_offset':-2,'roll_day':31},  # last BD of 2mo before delivery
    'CL=F':  {'base':'CL', 'exchange':'NYM','months':list(range(1,13)),'roll_offset':-1,'roll_day':20},  # ~3 BD before 25th of prior month
    'TTF=F': {'base':'TTF','exchange':'NYM','months':list(range(1,13)),'roll_offset':-1,'roll_day':31},  # last BD of month before delivery
    # Precious metals — 3rd-to-last BD of delivery month ≈ 27th
    'GC=F':  {'base':'GC', 'exchange':'CMX','months':[2,4,6,8,10,12],  'roll_offset':0,'roll_day':27},
    'SI=F':  {'base':'SI', 'exchange':'CMX','months':[1,3,5,7,9,12],   'roll_offset':0,'roll_day':27},
    'PL=F':  {'base':'PL', 'exchange':'NYM','months':[1,4,7,10],       'roll_offset':0,'roll_day':27},
    # Base metals — 3rd-to-last BD of delivery month ≈ 27th
    'HG=F':  {'base':'HG', 'exchange':'CMX','months':[3,5,7,9,12],     'roll_offset':0,'roll_day':27},
    'ALI=F': {'base':'ALI','exchange':'CMX','months':list(range(1,13)), 'roll_offset':0,'roll_day':27},
    # Softs — ICE
    'CC=F':  {'base':'CC', 'exchange':'NYB','months':[3,5,7,9,12],     'roll_offset':-1,'roll_day':15},  # ~10 BD before 1st BD of delivery
    'KC=F':  {'base':'KC', 'exchange':'NYB','months':[3,5,7,9,12],     'roll_offset':-1,'roll_day':20},  # ~8 BD before last BD of delivery mo
    'SB=F':  {'base':'SB', 'exchange':'NYB','months':[3,5,7,10],       'roll_offset':-1,'roll_day':31},  # last trading day of prior month
    # Grains — CBOT: BD before 15th of delivery month ≈ 14th
    'ZW=F':  {'base':'ZW', 'exchange':'CBT','months':[3,5,7,9,12],     'roll_offset':0,'roll_day':14},
    'ZC=F':  {'base':'ZC', 'exchange':'CBT','months':[3,5,7,9,12],     'roll_offset':0,'roll_day':14},
    'ZS=F':  {'base':'ZS', 'exchange':'CBT','months':[1,3,5,7,8,9,11], 'roll_offset':0,'roll_day':14},
}


def get_front_month(generic_ticker, today=None):
    """Determine the specific Yahoo contract ticker for the current front month.

    Uses a fixed roll calendar rather than Yahoo's underlyingSymbol, which is
    unstable near contract expiry.  The roll date for each delivery month is
    defined as day ``roll_day`` in the month that is ``roll_offset`` months from
    the delivery month.  We iterate forward through active delivery months and
    return the first one whose roll date is still in the future (i.e. we have
    not yet rolled past it).
    """
    if today is None:
        today = date.today()
    cfg = ROLL_CONFIG.get(generic_ticker)
    if not cfg:
        return None

    base, exchange = cfg['base'], cfg['exchange']
    active = cfg['months']
    offset = cfg['roll_offset']
    roll_day = cfg['roll_day']

    for i in range(24):
        m = (today.month - 1 + i) % 12 + 1
        y = today.year + (today.month - 1 + i) // 12
        if m not in active:
            continue

        # Compute roll date for this delivery month
        roll_m = m + offset
        roll_y = y
        if roll_m <= 0:
            roll_m += 12
            roll_y -= 1
        elif roll_m > 12:
            roll_m -= 12
            roll_y += 1

        max_day = calendar.monthrange(roll_y, roll_m)[1]
        roll_date = date(roll_y, roll_m, min(roll_day, max_day))

        if today <= roll_date:
            code = MONTH_TO_CODE[m]
            return f"{base}{code}{y % 100:02d}.{exchange}"

    return None

sb = create_client(os.environ['SUPABASE_URL'], os.environ['SUPABASE_TOKEN'])


def chunks(iterable, size):
    it = iter(iterable)
    while chunk := list(islice(it, size)):
        yield chunk


def sb_execute(query, retries=3, delay=5):
    """Execute a Supabase query with retry on transient errors (502, 503, etc.)."""
    for attempt in range(retries):
        try:
            return query.execute()
        except Exception as e:
            if attempt < retries - 1 and any(s in str(e) for s in ['502', '503', '504', 'Bad Gateway', 'JSON could not be generated']):
                print(f"  Supabase error (attempt {attempt+1}/{retries}), retrying in {delay}s: {str(e)[:100]}")
                time.sleep(delay)
                delay *= 2
            else:
                raise


def fetch_and_store(tickers: list, sb, is_crypto: bool = False) -> None:
    """Incremental fetch from Yahoo Finance → Supabase.

    Looks up each ticker's most recent date in Supabase (last 10 days) and
    fetches from max_date+1.  If a ticker has no recent data it is logged
    but does NOT trigger a full 2-year re-download — that only happens when
    no tickers at all have data (first run / new section).

    Uses ON CONFLICT DO NOTHING so duplicate rows are silently skipped.
    """
    today = date.today().isoformat()
    end = (date.today() + timedelta(days=1)).isoformat()  # yf.download end is exclusive
    check_since = (date.today() - timedelta(days=10)).isoformat()

    result = sb_execute(sb.table("fin_price_close")
                .select("ticker,date")
                .in_("ticker", tickers)
                .gte("date", check_since))

    if result.data:
        df_recent = pd.DataFrame(result.data)
        tickers_with_recent = set(df_recent['ticker'].unique())
        missing = set(tickers) - tickers_with_recent
        if missing:
            print(f"  {len(missing)} ticker(s) without recent data, skipping: {sorted(missing)}")
        max_date = date.fromisoformat(df_recent['date'].max())
        start = (max_date + timedelta(days=1)).isoformat()
    else:
        # No data at all — first run, bootstrap with 2 years of history
        print("  No existing data in Supabase — fetching 2y history")
        start = (date.today() - timedelta(days=2*365)).isoformat()

    # Even when the global max is up to date, some tickers may lag behind
    # (e.g. western markets when the job runs after Asian close but before
    # western open).  Identify lagging tickers for a targeted second pass.
    lagging_tickers = []
    if result.data and start >= today:
        per_max = df_recent.groupby('ticker')['date'].max()
        lagging = per_max[per_max < max_date.isoformat()]
        if not lagging.empty:
            lagging_tickers = lagging.index.tolist()
            lag_start = (date.fromisoformat(lagging.min()) + timedelta(days=1)).isoformat()
            print(f"  Global max up to date, but {len(lagging_tickers)} ticker(s) lagging — backfilling from {lag_start}")
        else:
            print(f"  Already up to date")
            return
        start = lag_start
        tickers = lagging_tickers

    df_wide = yf.download(tickers, start=start, end=end, auto_adjust=True)['Close']
    if df_wide.empty:
        print("  No new data from Yahoo Finance")
        return

    # Yahoo occasionally drops a daily bar that hourly data still has.
    # Use bdate_range to only check business days (skips weekends; holidays
    # will try hourly, get nothing back, and harmlessly continue).
    # Cap at 5 to avoid hammering Yahoo when the gap is just a long holiday.
    expected_dates = pd.bdate_range(start, df_wide.index.max())
    present_dates = set(df_wide.index.normalize())
    missing_dates = [d for d in expected_dates if d not in present_dates]
    if missing_dates and len(missing_dates) <= 5:
        print(f"  {len(missing_dates)} missing daily bar(s), trying hourly fallback")
        idx_name = df_wide.index.name
        for gap_date in missing_dates:
            gap_start = gap_date.strftime('%Y-%m-%d')
            gap_end = (gap_date + timedelta(days=1)).strftime('%Y-%m-%d')
            hourly = yf.download(tickers, start=gap_start, end=gap_end,
                                 interval='1h', auto_adjust=True)
            if hourly.empty:
                continue
            h_close = hourly['Close']
            if isinstance(h_close, pd.Series):
                h_close = h_close.to_frame(name=tickers[0])
            last_row = h_close.iloc[[-1]].copy()
            last_row.index = pd.DatetimeIndex([gap_date], name=idx_name)
            df_wide = pd.concat([df_wide, last_row]).sort_index()
            print(f"    {gap_start}: filled from hourly ({len(h_close)} bars)")
    elif missing_dates:
        print(f"  {len(missing_dates)} missing daily bar(s), too many to fill — skipping hourly fallback")

    # Sunday evening futures sessions can leak in via end=today+1.
    # Crypto trades 24/7 so we keep weekend rows for that section.
    if not is_crypto:
        df_wide = df_wide[df_wide.index.dayofweek < 5]  # Mon=0 .. Fri=4

    df_long = (df_wide.reset_index()
                      .melt(id_vars=['Date'], var_name='ticker', value_name='price')
                      .dropna(subset=['price']))
    df_long['date'] = pd.to_datetime(df_long['Date']).dt.date.apply(lambda x: x.isoformat())
    df_long = df_long.drop_duplicates(subset=['ticker', 'date'], keep='last')
    records = df_long[['ticker', 'date', 'price']].to_dict(orient='records')

    print(f"  Inserting {len(records)} rows ({start} → {today})")
    for batch in chunks(records, 1000):
        sb_execute(sb.table("fin_price_close").upsert(batch, on_conflict="ticker,date",
                   ignore_duplicates=True))


def load_from_supabase(tickers: list, sb) -> pd.DataFrame:
    """Load last ~400 days of close prices from Supabase, return wide DataFrame."""
    since = (date.today() - timedelta(days=400)).isoformat()
    rows, page_size = [], 1000
    offset = 0
    while True:
        result = sb_execute(sb.table("fin_price_close")
                    .select("date, ticker, price")
                    .in_("ticker", tickers)
                    .gte("date", since)
                    .order("date")
                    .order("ticker")
                    .range(offset, offset + page_size - 1))
        rows.extend(result.data)
        if len(result.data) < page_size:
            break
        offset += page_size

    df = pd.DataFrame(rows)
    df['date'] = pd.to_datetime(df['date']).dt.date
    df = df.drop_duplicates(subset=['ticker', 'date'], keep='last')
    df_wide = df.pivot(index='date', columns='ticker', values='price')
    df_wide.index.name = 'value_date'
    return df_wide


def format_number(x: float) -> str:
    if pd.isna(x):
        return ""
    if x < 100:
        return f"{x:,.2f}"
    elif x < 1000:
        return f"{x:,.1f}"
    elif x < 100_000:
        return f"{x:,.0f}"
    elif x < 1_000_000:
        return f"{x/1000:,.1f}k"
    else:
        return f"{x/1_000_000:,.1f}M"


def resolve_front_month_contracts(tickers: list) -> dict:
    """For =F tickers, resolve the front-month contract using our roll calendar.

    Falls back to Yahoo's underlyingSymbol for tickers not in ROLL_CONFIG.
    """
    mapping = {}
    for t in tickers:
        sym = get_front_month(t)
        if sym:
            mapping[t] = sym
        elif t.endswith('=F'):
            # Fallback to Yahoo for tickers without a roll config
            try:
                sym = yf.Ticker(t).info.get('underlyingSymbol')
                if sym and sym != t:
                    mapping[t] = sym
            except Exception:
                pass
        if t in mapping:
            print(f"  {t} → {mapping[t]}")
    return mapping


def fetch_contract_prices(contract_tickers: list) -> pd.DataFrame:
    """Fetch last 14 days of close prices for specific contract tickers.

    Used for short-horizon returns (1d/1w) where the generic =F ticker
    can jump on contract roll dates.  Returns only successfully downloaded
    columns, trimmed to yesterday (today's bar may be incomplete).
    """
    if not contract_tickers:
        return pd.DataFrame()
    start = (date.today() - timedelta(days=14)).isoformat()
    end = (date.today() + timedelta(days=1)).isoformat()  # yf end is exclusive
    df = yf.download(contract_tickers, start=start, end=end, auto_adjust=False)['Close']
    if isinstance(df, pd.Series):
        df = df.to_frame(name=contract_tickers[0])
    # Drop columns that are all NaN (failed downloads)
    df = df.dropna(axis=1, how='all')
    df.index = df.index.date
    df.index.name = 'value_date'
    # Exclude today — only use through yesterday
    yesterday = date.today() - timedelta(days=1)
    df = df.loc[df.index <= yesterday]
    return df


def enrich_and_take_latest(g, ticker, contract_prices=None):
    """Compute period returns (1d/1w/1m/1y) and z-scores for a single ticker.

    Uses contract_prices for short horizons (1d/1w) when available — these
    track the specific front-month contract and avoid roll-day jumps.
    Falls back to the generic =F series from Supabase for longer horizons.
    """
    g = g.sort_values('value_date')
    latest = g.iloc[-1:].copy()
    latest['ticker'] = ticker
    stdev = g['price'].pct_change().std()

    cp = contract_prices.dropna().sort_index() if contract_prices is not None else pd.Series(dtype=float)

    changes = {}
    zscores = {}
    for k, v in HORIZONS.items():
        if k in SHORT_HORIZONS and len(cp) > v:
            changes[k] = cp.iloc[-1] / cp.iloc[-1 - v] - 1
        else:
            changes[k] = g['price'].pct_change(v).iloc[-1]
        zscores[f'z_{k}'] = changes[k] / (stdev * (v**0.5))

    return latest.assign(**changes, **zscores)


# %%
for section, config in MARKET_CONFIG.items():
    DATA = config['data']
    BORDER = config['border']
    TITLE = config['title']
    tickers = list(DATA.keys())

    print(f"\n=== {TITLE} ===")

    # Step 1: Yahoo Finance → Supabase (incremental)
    fetch_and_store(tickers, sb, is_crypto=(section == 'cryptos'))

    # Step 2: Supabase → wide DataFrame
    df_wide = load_from_supabase(tickers, sb)

    # Use yesterday as cut-off — today's bar may be incomplete / not yet available
    yesterday = date.today() - timedelta(days=1)
    df_wide = df_wide.loc[df_wide.index <= yesterday]

    # Record the last real (non-NaN) date per ticker BEFORE forward-fill so we
    # can trim forward-filled rows later and avoid fake 0% 1d changes.
    actual_last_dates = {
        col: df_wide[col].dropna().index.max()
        for col in df_wide.columns if df_wide[col].notna().any()
    }
    t_latest = max(actual_last_dates.values())

    # Forward-fill across holidays so they show 0% change, not multi-day return
    df_wide = df_wide.ffill()

    # Resolve front-month contracts for short-horizon (1d/1w) returns
    contract_map = resolve_front_month_contracts(tickers)
    contract_syms = list(set(contract_map.values()))
    contract_df = fetch_contract_prices(contract_syms) if contract_syms else pd.DataFrame()

    df_long = (df_wide.stack()
                      .rename('price')
                      .reset_index()
                      .rename(columns={df_wide.columns.name: 'ticker'}))

    # Compute returns and z-scores per ticker.  Trim each ticker to its actual
    # last date so forward-filled rows don't produce a fake 0% 1d change when
    # other tickers (different timezone / exchange) have newer data.
    enriched = []
    for ticker, group in df_long.groupby('ticker'):
        if ticker in actual_last_dates:
            group = group[group['value_date'] <= actual_last_dates[ticker]]
        cp = None
        if ticker in contract_map and contract_map[ticker] in contract_df.columns:
            cp = contract_df[contract_map[ticker]]
        row = enrich_and_take_latest(group, ticker, cp)
        # Override displayed price with the contract's latest price
        if cp is not None and len(cp.dropna()) > 0:
            row['price'] = cp.dropna().iloc[-1]
        enriched.append(row)
    df_latest = pd.concat(enriched).reset_index(drop=True)

    df_latest['Description'] = df_latest['ticker'].map(DATA)

    # Add contract month indicator for futures tickers
    for i, row in df_latest.iterrows():
        t = row['ticker']
        if t in contract_map:
            sym = contract_map[t]  # e.g. "CLM26.NYM"
            base = sym.split('.')[0]  # e.g. "CLM26"
            m = re.search(r'([A-Z])(\d{2})$', base)
            if m and m.group(1) in FUTURES_MONTH:
                month = FUTURES_MONTH[m.group(1)]
                year = m.group(2)
                desc = df_latest.at[i, 'Description']
                if desc.endswith(')'):
                    desc = desc[:-1] + f', {month}{year})'
                else:
                    desc += f' {month}{year}'
                df_latest.at[i, 'Description'] = desc

    # Mark tickers whose last actual price is older than t_latest
    for i, row in df_latest.iterrows():
        t = row['ticker']
        if t in actual_last_dates and actual_last_dates[t] < t_latest:
            marker = actual_last_dates[t].strftime('%-d/%-m')
            df_latest.at[i, 'Description'] += f' ({marker})'

    df_latest['url'] = df_latest['ticker'].apply(lambda t: f'[{t}]({YAHOO_URL}{t})')
    df_latest['ticker'] = pd.Categorical(df_latest['ticker'], categories=DATA.keys(), ordered=True)
    df_latest = df_latest.sort_values('ticker').set_index('Description')

    df_latest['price'] = df_latest['price'].apply(format_number)

    # Novem plot
    visible_cols = ['price'] + list(HORIZONS.keys()) + ['url']
    data = df_latest[visible_cols]
    plt = Plot(section)
    plt.type = 'table'
    plt.data = data

    print(plt.url)

    plt.cell.align = '''
    : 1: >
    : 0 <
    0 2:-2 -
    '''

    plt.cell.border = ''
    for b in BORDER:
        plt.cell.border += f'{b} : b 1 gray-500\n'

    plt.cell.padding = '''
    : 1: l 4
    : :-2 r 4
    0 : t 2
    '''

    plt.cell.format = ''
    plt.cell.format += S(data.loc[:, 'price'], ' ', data)
    plt.cell.format += S(data.loc[:, '1d'], ',.1%', data)
    plt.cell.format += S(data.loc[:, '1w':], ',.0%', data)

    plt.colors.type = 'ix'
    plt.colors = ''
    for horizon in HORIZONS:
        z_col = f'z_{horizon}'
        col_index = data.columns.get_loc(horizon) + 1
        plt.colors += S(df_latest.loc[df_latest[z_col] >  1, [horizon]], SC('bg', 'green-100'), data, col_index)
        plt.colors += S(df_latest.loc[df_latest[z_col] < -1, [horizon]], SC('bg', 'red-100'), data, col_index)
        plt.colors += S(df_latest.loc[df_latest[z_col] >  1, [horizon]], SC('fg', 'green-700'), data, col_index)
        plt.colors += S(df_latest.loc[df_latest[z_col] < -1, [horizon]], SC('fg', 'red-700'), data, col_index)

    plt.name = TITLE + ' ' + t_latest.strftime('%d %b %Y')
    plt.title = ''
    plt.caption = f'''{TITLE}. Colors highlight moves greater than one standard deviation. Close prices as of {t_latest.strftime('%d %b %Y')}. Source: Yahoo! Finance, calculations by novem.'''
    plt.type = 'mtable'
    plt.shared += 'public'
    plt.x

    code_block = f'''```python
    {inspect.getsource(sys.modules[__name__])}
    ```'''
    plt.description = code_block

# %%

    ```