No topics yet. Start the conversation.

Summary

User supplied summary for the plot

Covid19 cases by US State from NYT 2025

Description

The below description is supplied in free-text by the user
import numpy as np      # numpy for some number manipulation
import inspect, sys
from novem import Plot  # get the novem plot

# N.B. below we are fetching live data hosted by others
#      be aware that this information can change at any time
#      so consider hosting a local copy

# get US covid data standardized by NYT
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/"
  "covid-19-data/master/rolling-averages/us-states.csv")

# get state to region mapping
cmap = pd.read_csv("https://raw.githubusercontent.com/cphalpert/"
  "census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv")

# enrich nyt rolling with state code and region
data = pd.merge(df, cmap[['State','Region']], 
        left_on='state', 
        right_on='State',
        how='left')

# per usual we're missing some geo mapping, but for now we will ignore 
# those locations. You can use the below commented out code to review the
# missing states
# missing = data.loc[pd.isna(data.Region),'state'].unique()

# remove missing data
data = data.loc[~pd.isna(data.Region)].copy()

# let's use our cases and cases per 100k as a rough proxy for state 
# population size
data['pop'] = data['cases_avg'] / (data['cases_avg_per_100k']/1e5)

# populate missing datapoints with na
data.loc[np.isinf(data['pop']), 'pop'] = np.nan

# no case data is no cases
data = data.fillna(0)

# carry population backwards
data['pop'] = data.groupby('state')['pop'].transform("bfill")


# let's create region aggregates
pdata = pd.pivot_table(data,
  index='date',
  columns='Region',
  values=['cases_avg', 'pop'],
  aggfunc='sum'
)

# create aggregate cases avg per 100k
df = ((pdata['cases_avg'] / pdata['pop'])*1e5).dropna()

# let's create state overview as well
sdata = pd.pivot_table(data,
  index='date',
  columns='State',
  values='cases_avg_per_100k',
  aggfunc='mean' # there should only be one value
).fillna(0)

# create our filter date
sd = '2020-03-10'
ed = df.index.max()

df = df.loc[sd:ed]
sdf = sdata.loc[sd:ed]

# find the current worst state and add it for reference
ws = sdf.iloc[-1].sort_values(ascending=False).index[0]

# add the worst state to the vis for reference
adf = pd.concat([df, sdf[ws]], axis=1)



# construct novem plot, if the name already exists it will
# be updated
linechart = Plot("covid_us_trend",
  type="line",
  title = "Covid19 cases by US State",
  name = "Covid19 cases by US State",
  caption = "This chart shows how average daily cases per capita"
            "have changed in different parts of the United States."
            "The state with the highest recent average cases per "
            f"capita ({ws}) is shown. Data from the New York Times, "
            "calculations by New York Times, Novem. "
            "Data last updated 23 Marcg 2023"
)

# send data to novem
adf.pipe(linechart)
linechart.shared += 'public'    
linechart.description = '```python' + ' ' + inspect.getsource(sys.modules[__name__]) + '```' # Include this python script in Description
linechart.summary = "Covid19 cases by US State from NYT 2025"
# get novem url 
print(linechart.url) # https://novem.no/p/Kwjdv