U.S. Census Data API in C#
by Cyrus Gomes
U.S. Census API documentation: https://www.census.gov/data/developers/about.html
U.S. Census Data Discovery Tool: https://api.census.gov/data.html
These recipe examples were tested on 28 November, 2023.
See also the U.S. Census API Terms of Service
Attribution: This tutorial uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
Setup#
First, install the CURL package by typing the following command in the terminal:
!sudo apt install curl
Then, install the jq package by typing the following command in the terminal:
!sudo apt install jq
Now we set a directory where we want the US-Census directory for our projects to be created :
!mkdir US-Census
Finally, we change to our newly created directory:
%cd US-Census
API Key Information#
While an API key is not required to use the U.S. Census Data API, you may consider registering for an API key as the API is limited to 500 calls a day without a key. Sign up can be found here: https://api.census.gov/data/key_signup.html
A key can be used by using -key [key]
.
1. Get population estimates of counties by state#
Note: includes Washington, D.C. and Puerto Rico
First we can initialize a directory for the all the project files:
!mkdir api_results
Then, we can change to the new directory:
%cd api_results
We utilize the following command (%%file) to create the following makefile which will compile our program and create an executable.
%%file makefile
# Set the variable CC to gcc, which is used to build the program
CC=gcc
# Enable debugging information and enable all compiler warnings
CFLAGS=-g -Wall
# Set the bin variable as the name of the binary file we are creating
BIN=census_data
# Create the binary file with the name we put
all: $(BIN)
# Map any file ending in .c to a binary executable
# "$<" represents the .c file and "$@" represents the target binary executable
%: %.c
# Compile the .c file using the gcc compiler with the CFLAGS and links
# resulting binary with the CURL library
$(CC) $(CFLAGS) $< -o $@ -lcurl
# Clean target which removes specific files
clean:
# Remove the binary file and an ".dSYM" (debug symbols for debugging) directories
# the RM command used -r to remove directories and -f to force delete
$(RM) -rf $(BIN) *.dSYM
The command is used again to create our .c file which contains the code for the program
%%file census_data.c
#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* CURL program that retrieves population estimate */
int main (int argc, char* argv[]) {
// Exit if arguments are invalid
if (argc < 2){
printf("Error. Please try again correctly. (./census_data -id [id] -key [key])\n");
return -1;
}
// Initializes the CURL HTTP connection
CURL *curl = curl_easy_init();
// Bits of the url that are joined together later
char api[] = "https://api.census.gov/data/";
char url[1000];
char label[] = "2019/pep/population?get=NAME&for=state:*";
char key_ext[] = "&key=";
// Check if CURL initialization is a success
if (!curl) {
fprintf(stderr, "init failed\n");
return EXIT_FAILURE;
}
/* Here are different ways of calling the program in the
command line and integrating id and key fields */
// Has the -id flag: /census_data -id
if ((argc==2) && (strcmp(argv[1],"-id")==0)) {
// Combine the api and the default id to produce a functioning url
sprintf(url, "%s%s", api, label);
}
// Has the -id flag and field: ./census_data -id [id]
else if ((argc==3) && (strcmp(argv[1],"-id")==0)) {
// Combine the api and the custom id
sprintf(url, "%s%s", api, argv[2]);
}
// Has the -id and -key flags: ./census_data -key -id
else if (((argc==3) && (strcmp(argv[1],"-key")==0)) && (strcmp(argv[2],"-id")==0)) {
// Combine the api and the default id
sprintf(url, "%s%s", api, label);
}
// Has the -id and -key flags and the key field: ./census_data -id -key [key]
else if (((argc==4) && (strcmp(argv[2],"-key")==0) && (strcmp(argv[1],"-id")==0))) {
// Combine the api, the default id and the key to produce a functioning url
sprintf(url, "%s%s%s%s", api, label, key_ext, argv[3]);
}
// Has the -id and -key flags and the id field: ./census_data -id [id] -key
else if (((argc==4) && (strcmp(argv[3],"-key")==0) && (strcmp(argv[1],"-id")==0))) {
// Combine the api and the custom id
sprintf(url, "%s%s", api, argv[2]);
}
// Has the -key and -id flags and the key field: ./census_data -key -id [id]
else if (((argc==4) && (strcmp(argv[1],"-key")==0) && (strcmp(argv[2],"-id")==0))) {
// Combine the api and the custom id
sprintf(url, "%s%s", api, argv[3]);
}
// Has the -key and -id flags and the key and id field: ./census_data -key [key] -id [id]
else if (((argc==5) && (strcmp(argv[1],"-key")==0) && (strcmp(argv[3],"-id")==0))) {
// Combine the api, the custom id, and the key to produce the url
sprintf(url, "%s%s%s%s", api, argv[4], key_ext, argv[2]);
}
// Has the -id and -key flags and the id and key field: ./census_data -id [id] -key [key]
else if (((argc==5) && (strcmp(argv[3],"-key")==0))){
// Combine the api, the custom id, and the key to produce the url
sprintf(url, "%s%s%s%s", api, argv[2], key_ext, argv[4]);
}
// If the arguments are invalid then return
else {
printf("./census_data -key [key] -id [id]\n");
curl_easy_cleanup(curl);
return 0;
}
// Set the url to which the HTTP request will be sent
// First parameter is for the initialized curl HTTP request, second for the option to be set, and third for the value to be set
curl_easy_setopt(curl, CURLOPT_URL, url);
// If result is not retrieved then output error
CURLcode result = curl_easy_perform(curl);
// If result is not retrieved then output error
if (result != CURLE_OK) {
fprintf(stderr, "download problem: %s\n", curl_easy_strerror(result));
}
// Deallocate memory for the CURL connection
curl_easy_cleanup(curl);
return EXIT_SUCCESS;
}
!make
gcc -g -Wall census_data.c -o census_data -lcurl
For obtaining data from the Census API, it is helpful to first obtain a list of state IDs:
!./census_data -id -key "" | jq ". | length"
53
Now we can print the first 5 state IDs:
!./census_data -id | jq ".[:6]"
[
[
"NAME",
"state"
],
[
"Alabama",
"01"
],
[
"Alaska",
"02"
],
[
"Arizona",
"04"
],
[
"Arkansas",
"05"
],
[
"California",
"06"
]
]
To remove the heading we implement the following command:
!./census_data -id | jq ".[1:6]"
[
[
"Alabama",
"01"
],
[
"Alaska",
"02"
],
[
"Arizona",
"04"
],
[
"Arkansas",
"05"
],
[
"California",
"06"
]
]
Now we can loop through each state and pull their individual population data (first 10 lines printed):
%%bash
# Adapted from ChatGPT
# Create an associative array
declare -A county_dictionary
# Get the number of states
no_of_states=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq ". | length")
# Sleep delay
sleep 1
# Go through all the states
for ((i = 1; i <= no_of_states; i++)); do
# Get the individual states and state ids
state=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq -r ". [$i][0]")
# Sleep delay
sleep 1
state_id=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq -r ". [$i][1]")
# Sleep delay
sleep 1
# Remove the double quotes
state_id=${state_id//\"/}
state=${state//\"/}
# Get the county detail
county_info=$(./census_data -id "2019/pep/population?get=NAME,POP&for=county:*&in=state:$state_id" -key "" | jq ".")
# Sleep delay
sleep 1
# Modify retrieved json data to retrieve county and population only (without state)
county_pop=$(echo "$county_info" | jq -r '.[1:] | map({(.[0] | split(", ")[0]): .[1]}) | add')
# Sort the data alphabetically by county names
sorted_county_pop=$(echo "$county_pop" | jq 'to_entries | sort_by(.key) | from_entries')
# Add all the counties to the associative array
county_dictionary["$state"]+="$sorted_county_pop"
done
# Can be changed to any state
echo "${county_dictionary["Alabama"]}" | head -n 10
{
"Autauga County": "55869",
"Baldwin County": "223234",
"Barbour County": "24686",
"Bibb County": "22394",
"Blount County": "57826",
"Bullock County": "10101",
"Butler County": "19448",
"Calhoun County": "113605",
"Chambers County": "33254",
2. Get population estimates over a range of years#
We can use similar code as before, but now loop through different population estimate datasets by year. Here are the specific APIs used:
Vintage 2015 Population Estimates: https://api.census.gov/data/2015/pep/population/examples.html
Vintage 2016 Population Estimates: https://api.census.gov/data/2016/pep/population/examples.html
Vintage 2017 Population Estimates: https://api.census.gov/data/2017/pep/population/examples.html
%%bash
# Adapted from ChatGPT
# Works with dates up to 2018
# Create an array to store the state names and ids
declare -A states_name_id
# Store all the state and ids
states_name_id=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "")
# Sleep delay
sleep 1
# Get the number of states
no_of_states=$(echo "$states_name_id" | jq ". | length")
# Create an associative array
declare -A county_dictionary
# Loop through the years 2015, 2016, 2017
for ((year = 2015; year < 2018; year++)); do
for ((i = 1; i <= no_of_states; i++)); do
# Get the individual state names and ids
state=$(echo "$states_name_id" | jq -r ". [$i][0]")
state_id=$(echo "$states_name_id" | jq -r ". [$i][1]")
# Get the county details
county_info=$(./census_data -id "$year/pep/population?get=GEONAME,POP&for=county:*&in=state:$state_id" -key "" | jq ".")
# Sleep delay
sleep 1
# Modify retrieved json data to retrieve county and population only (without state)
county_pop=$(echo "$county_info" | jq -r '.[1:] | map({(.[0] | split(", ")[0]): .[1]}) | add')
# Sort the data alphabetically by county name
sorted_county_pop=$(echo "$county_pop" | jq 'to_entries | sort_by(.key) | from_entries')
# Add the year beside the retrieved data
yearly_county_pop=$(echo "$sorted_county_pop" | jq -r '. as $in | {"'$year'": $in}')
# Add all of the counties to the associative array
county_dictionary["$state"]+="$yearly_county_pop"
done
done
# Can be changed to any state
echo "${county_dictionary["Alabama"]}" | head -n 10
{
"2015": {
"Autauga County": "55347",
"Baldwin County": "203709",
"Barbour County": "26489",
"Bibb County": "22583",
"Blount County": "57673",
"Bullock County": "10696",
"Butler County": "20154",
"Calhoun County": "115620",