Explore
Groups
No topics yet. Start the conversation.
Summary
User supplied summary for the plot
Population by age group in the five most populous US states. Source: US Census Bureau.
Description
The below description is supplied in free-text by the user
import pandas as pd
from novem import Plot
import inspect
import sys
# Constants
POPULATION_DIVISOR = 1e6
CENSUS_YEAR = 2020
CENSUS_API_BASE = "https://api.census.gov/data"
def census_codes(start: int, end: int) -> list[str]:
"""Return zero-padded census codes from start to end inclusive."""
return [f"{x:03d}" for x in range(start, end + 1)]
def census_code_pair(male: tuple[int, int], female: tuple[int, int]) -> list[str]:
"""Expand male and female census ranges into combined codes."""
return census_codes(*male) + census_codes(*female)
# Define age brackets (male+female ranges combined)
AGE_BRACKETS = [
("0-5", census_code_pair((3, 3), (27, 27))),
("5-14", census_code_pair((4, 5), (28, 29))),
("15-17", census_code_pair((6, 6), (30, 30))),
("18-24", census_code_pair((7, 10), (31, 34))),
("25-44", census_code_pair((11, 14), (34, 38))),
("45-64", census_code_pair((15, 19), (39, 43))),
("65+", census_code_pair((20, 25), (44, 49))),
]
AGE_ORDER = [label for label, _ in AGE_BRACKETS]
# Build mapping table: census codes → age brackets
code_map = pd.DataFrame(
[{"bracket": label, "code": f"B01001_{code}E"}
for label, codes in AGE_BRACKETS
for code in codes]
)
# Build Census API URL
codes_str = ",".join(code_map["code"])
census_url = f"{CENSUS_API_BASE}/{CENSUS_YEAR}/acs/acs5?get={codes_str}&for=state:*"
# Fetch and reshape data
raw_df = pd.read_json(census_url)
raw_df.columns = raw_df.iloc[0]
raw_df = raw_df.drop(0).reset_index(drop=True)
raw_df = (
raw_df.set_index("state")
.stack()
.reset_index(name="pop")
.rename(columns={0: "code"})
)
# Merge with age brackets
merged_df = pd.merge(raw_df, code_map)
# Fetch state names from Wikipedia
state_table = pd.read_html(
"https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code"
)[0].rename(columns={"Numeric code": "FIPS"})
# Add FIPS integer column
merged_df["state_fips_int"] = pd.to_numeric(merged_df["state"], errors="coerce").astype("Int64")
# Merge with state names
full_data = merged_df.merge(
state_table,
left_on="state_fips_int",
right_on="FIPS",
how="left"
)
# Convert population to millions
full_data["pop"] = full_data["pop"].astype(float) / POPULATION_DIVISOR
# Make bracket categorical and ordered
full_data["bracket"] = pd.Categorical(
full_data["bracket"],
categories=AGE_ORDER,
ordered=True
)
# Pivot: states as rows, brackets as columns
pivot_data = (
full_data.pivot_table(
index="Name", columns="bracket", values="pop", aggfunc="sum"
)
.assign(total=lambda df: df.sum(axis=1))
.sort_values("total", ascending=False)
.drop(columns="total")
)
# Select top 5 states by population
top_states = pivot_data.iloc[:5]
# Construct Novem plot
plot_caption = (
"The 5 most populous states in the US. "
"Data from the Census Bureau Data API, but not endorsed or certified by the Census Bureau. "
"Calculations by novem."
)
barchart = Plot(
"state_pop",
type="gbar",
name="Population (millions) by age in top five US states by population",
caption=plot_caption
)
barchart.shared += 'public'
# Send data to Novem
top_states.pipe(barchart)
print(barchart.url) # https://novem.no/p/qNGgN
# Include this python script in Description
barchart.description = (
'```python\n' + inspect.getsource(sys.modules[__name__]) + '\n```'
)