Explore
Groups
No topics yet. Start the conversation.
Summary
User supplied summary for the plot
Covid19 cases by US State from NYT 2025
Description
The below description is supplied in free-text by the user
import numpy as np # numpy for some number manipulation
import inspect, sys
from novem import Plot # get the novem plot
# N.B. below we are fetching live data hosted by others
# be aware that this information can change at any time
# so consider hosting a local copy
# get US covid data standardized by NYT
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/"
"covid-19-data/master/rolling-averages/us-states.csv")
# get state to region mapping
cmap = pd.read_csv("https://raw.githubusercontent.com/cphalpert/"
"census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv")
# enrich nyt rolling with state code and region
data = pd.merge(df, cmap[['State','Region']],
left_on='state',
right_on='State',
how='left')
# per usual we're missing some geo mapping, but for now we will ignore
# those locations. You can use the below commented out code to review the
# missing states
# missing = data.loc[pd.isna(data.Region),'state'].unique()
# remove missing data
data = data.loc[~pd.isna(data.Region)].copy()
# let's use our cases and cases per 100k as a rough proxy for state
# population size
data['pop'] = data['cases_avg'] / (data['cases_avg_per_100k']/1e5)
# populate missing datapoints with na
data.loc[np.isinf(data['pop']), 'pop'] = np.nan
# no case data is no cases
data = data.fillna(0)
# carry population backwards
data['pop'] = data.groupby('state')['pop'].transform("bfill")
# let's create region aggregates
pdata = pd.pivot_table(data,
index='date',
columns='Region',
values=['cases_avg', 'pop'],
aggfunc='sum'
)
# create aggregate cases avg per 100k
df = ((pdata['cases_avg'] / pdata['pop'])*1e5).dropna()
# let's create state overview as well
sdata = pd.pivot_table(data,
index='date',
columns='State',
values='cases_avg_per_100k',
aggfunc='mean' # there should only be one value
).fillna(0)
# create our filter date
sd = '2020-03-10'
ed = df.index.max()
df = df.loc[sd:ed]
sdf = sdata.loc[sd:ed]
# find the current worst state and add it for reference
ws = sdf.iloc[-1].sort_values(ascending=False).index[0]
# add the worst state to the vis for reference
adf = pd.concat([df, sdf[ws]], axis=1)
# construct novem plot, if the name already exists it will
# be updated
linechart = Plot("covid_us_trend",
type="line",
title = "Covid19 cases by US State",
name = "Covid19 cases by US State",
caption = "This chart shows how average daily cases per capita"
"have changed in different parts of the United States."
"The state with the highest recent average cases per "
f"capita ({ws}) is shown. Data from the New York Times, "
"calculations by New York Times, Novem. "
"Data last updated 23 Marcg 2023"
)
# send data to novem
adf.pipe(linechart)
linechart.shared += 'public'
linechart.description = '```python' + ' ' + inspect.getsource(sys.modules[__name__]) + '```' # Include this python script in Description
linechart.summary = "Covid19 cases by US State from NYT 2025"
# get novem url
print(linechart.url) # https://novem.no/p/Kwjdv