U.S. Census Data API in C#

by Cyrus Gomes

U.S. Census API documentation: https://www.census.gov/data/developers/about.html

U.S. Census Data Discovery Tool: https://api.census.gov/data.html

These recipe examples were tested on 28 November, 2023.

See also the U.S. Census API Terms of Service

Attribution: This tutorial uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.

Setup#

First, install the CURL package by typing the following command in the terminal:

!sudo apt install curl

Then, install the jq package by typing the following command in the terminal:

!sudo apt install jq

Now we set a directory where we want the US-Census directory for our projects to be created :

!mkdir US-Census

Finally, we change to our newly created directory:

%cd US-Census

API Key Information#

While an API key is not required to use the U.S. Census Data API, you may consider registering for an API key as the API is limited to 500 calls a day without a key. Sign up can be found here: https://api.census.gov/data/key_signup.html

A key can be used by using -key [key].

1. Get population estimates of counties by state#

Note: includes Washington, D.C. and Puerto Rico

First we can initialize a directory for the all the project files:

!mkdir api_results

Then, we can change to the new directory:

%cd api_results

We utilize the following command (%%file) to create the following makefile which will compile our program and create an executable.

%%file makefile

# Set the variable CC to gcc, which is used to build the program
CC=gcc

# Enable debugging information and enable all compiler warnings
CFLAGS=-g -Wall

# Set the bin variable as the name of the binary file we are creating
BIN=census_data

# Create the binary file with the name we put
all: $(BIN)

# Map any file ending in .c to a binary executable
# "$<" represents the .c file and "$@" represents the target binary executable
%: %.c

	# Compile the .c file using the gcc compiler with the CFLAGS and links 
	# resulting binary with the CURL library
	$(CC) $(CFLAGS) $< -o $@ -lcurl

# Clean target which removes specific files
clean:

	# Remove the binary file and an ".dSYM" (debug symbols for debugging) directories
	# the RM command used -r to remove directories and -f to force delete
	$(RM) -rf $(BIN) *.dSYM

The command is used again to create our .c file which contains the code for the program

%%file census_data.c

#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* CURL program that retrieves population estimate */

int main (int argc, char* argv[]) {
    
    // Exit if arguments are invalid
    if (argc < 2){                                                                                      
        printf("Error. Please try again correctly. (./census_data -id [id] -key [key])\n");
        return -1;
    }

    // Initializes the CURL HTTP connection
    CURL *curl = curl_easy_init();

    // Bits of the url that are joined together later                                                                      
    char api[] = "https://api.census.gov/data/";                            
    char url[1000];
    char label[] = "2019/pep/population?get=NAME&for=state:*";
    char key_ext[] = "&key=";

    // Check if CURL initialization is a success
    if (!curl) {                                                                 
        fprintf(stderr, "init failed\n");
        return EXIT_FAILURE;
    }
    
    /* Here are different ways of calling the program in the
    command line and integrating id and key fields */

    // Has the -id flag: /census_data -id
    if ((argc==2) && (strcmp(argv[1],"-id")==0)) {
        
        // Combine the api and the default id to produce a functioning url
        sprintf(url, "%s%s", api, label); 
        
    }
    
    // Has the -id flag and field: ./census_data -id [id]
    else if ((argc==3) && (strcmp(argv[1],"-id")==0)) {
        
        // Combine the api and the custom id
        sprintf(url, "%s%s", api, argv[2]);                                              
    
    }
    
    // Has the -id and -key flags: ./census_data -key -id
    else if (((argc==3) && (strcmp(argv[1],"-key")==0)) && (strcmp(argv[2],"-id")==0)) {
        
        // Combine the api and the default id
        sprintf(url, "%s%s", api, label);                                              
    
    }
    
    // Has the -id and -key flags and the key field: ./census_data -id -key [key]
    else if (((argc==4) && (strcmp(argv[2],"-key")==0) && (strcmp(argv[1],"-id")==0))) {
        
        // Combine the api, the default id and the key to produce a functioning url
        sprintf(url, "%s%s%s%s", api, label, key_ext, argv[3]);                                              
    
    }
    
    // Has the -id and -key flags and the id field: ./census_data -id [id] -key
    else if (((argc==4) && (strcmp(argv[3],"-key")==0) && (strcmp(argv[1],"-id")==0))) {
        
        // Combine the api and the custom id
        sprintf(url, "%s%s", api, argv[2]);                                              
    
    }
    
    // Has the -key and -id flags and the key field: ./census_data -key -id [id] 
    else if (((argc==4) && (strcmp(argv[1],"-key")==0) && (strcmp(argv[2],"-id")==0))) {
        
        // Combine the api and the custom id
        sprintf(url, "%s%s", api, argv[3]);                                              
    
    }
    
    // Has the -key and -id flags and the key and id field: ./census_data -key [key] -id [id] 
    else if (((argc==5) && (strcmp(argv[1],"-key")==0) && (strcmp(argv[3],"-id")==0))) {
        
        // Combine the api, the custom id, and the key to produce the url
        sprintf(url, "%s%s%s%s", api, argv[4], key_ext,  argv[2]);                                              
    
    }
    
    // Has the -id and -key flags and the id and key field: ./census_data -id [id] -key [key] 
    else if (((argc==5) && (strcmp(argv[3],"-key")==0))){
        
        // Combine the api, the custom id, and the key to produce the url
        sprintf(url, "%s%s%s%s", api, argv[2], key_ext, argv[4]);                                              
    
    }

    // If the arguments are invalid then return
    else {        
        printf("./census_data -key [key] -id [id]\n");                                                                                      
        curl_easy_cleanup(curl);
        return 0;
    }                                            

    // Set the url to which the HTTP request will be sent
    // First parameter is for the initialized curl HTTP request, second for the option to be set, and third for the value to be set
    curl_easy_setopt(curl, CURLOPT_URL, url);

    // If result is not retrieved then output error
    CURLcode result = curl_easy_perform(curl);

    // If result is not retrieved then output error
    if (result != CURLE_OK) {                                                                            
        fprintf(stderr, "download problem: %s\n", curl_easy_strerror(result));
    }

    // Deallocate memory for the CURL connection
    curl_easy_cleanup(curl);                                                                            
    return EXIT_SUCCESS;
}
!make
gcc -g -Wall census_data.c -o census_data -lcurl

For obtaining data from the Census API, it is helpful to first obtain a list of state IDs:

!./census_data -id  -key "" | jq ". | length"
53

Now we can print the first 5 state IDs:

!./census_data -id | jq ".[:6]"
[
  [
    "NAME",
    "state"
  ],
  [
    "Alabama",
    "01"
  ],
  [
    "Alaska",
    "02"
  ],
  [
    "Arizona",
    "04"
  ],
  [
    "Arkansas",
    "05"
  ],
  [
    "California",
    "06"
  ]
]

To remove the heading we implement the following command:

!./census_data -id | jq ".[1:6]"
[
  [
    "Alabama",
    "01"
  ],
  [
    "Alaska",
    "02"
  ],
  [
    "Arizona",
    "04"
  ],
  [
    "Arkansas",
    "05"
  ],
  [
    "California",
    "06"
  ]
]

Now we can loop through each state and pull their individual population data (first 10 lines printed):

%%bash

# Adapted from ChatGPT
# Create an associative array
declare -A county_dictionary

# Get the number of states
no_of_states=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq ". | length")

# Sleep delay
sleep 1

# Go through all the states
for ((i = 1; i <= no_of_states; i++)); do

    # Get the individual states and state ids
    state=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq -r ". [$i][0]")
    
    # Sleep delay
    sleep 1
    
    state_id=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "" | jq -r ". [$i][1]")
    
    # Sleep delay
    sleep 1

    # Remove the double quotes
    state_id=${state_id//\"/}
    state=${state//\"/}

    # Get the county detail
    county_info=$(./census_data -id "2019/pep/population?get=NAME,POP&for=county:*&in=state:$state_id" -key "" | jq ".")
    
    # Sleep delay
    sleep 1

    # Modify retrieved json data to retrieve county and population only (without state)
    county_pop=$(echo "$county_info" | jq -r '.[1:] | map({(.[0] | split(", ")[0]): .[1]}) | add')

    # Sort the data alphabetically by county names
    sorted_county_pop=$(echo "$county_pop" | jq 'to_entries | sort_by(.key) | from_entries')

    # Add all the counties to the associative array
    county_dictionary["$state"]+="$sorted_county_pop"
done

# Can be changed to any state
echo "${county_dictionary["Alabama"]}" | head -n 10
{
  "Autauga County": "55869",
  "Baldwin County": "223234",
  "Barbour County": "24686",
  "Bibb County": "22394",
  "Blount County": "57826",
  "Bullock County": "10101",
  "Butler County": "19448",
  "Calhoun County": "113605",
  "Chambers County": "33254",

2. Get population estimates over a range of years#

We can use similar code as before, but now loop through different population estimate datasets by year. Here are the specific APIs used:

Vintage 2015 Population Estimates: https://api.census.gov/data/2015/pep/population/examples.html

Vintage 2016 Population Estimates: https://api.census.gov/data/2016/pep/population/examples.html

Vintage 2017 Population Estimates: https://api.census.gov/data/2017/pep/population/examples.html

%%bash

# Adapted from ChatGPT
# Works with dates up to 2018

# Create an array to store the state names and ids
declare -A states_name_id

# Store all the state and ids
states_name_id=$(./census_data -id "2019/pep/population?get=NAME&for=state:*" -key "")

# Sleep delay
sleep 1

# Get the number of states
no_of_states=$(echo "$states_name_id" | jq ". | length")

# Create an associative array
declare -A county_dictionary

# Loop through the years 2015, 2016, 2017
for ((year = 2015; year < 2018; year++)); do

    for ((i = 1; i <= no_of_states; i++)); do

        # Get the individual state names and ids
        state=$(echo "$states_name_id" | jq -r ". [$i][0]")
        state_id=$(echo "$states_name_id" | jq -r ". [$i][1]")

        # Get the county details
        county_info=$(./census_data -id "$year/pep/population?get=GEONAME,POP&for=county:*&in=state:$state_id" -key "" | jq ".")
        
        # Sleep delay
        sleep 1
        
        # Modify retrieved json data to retrieve county and population only (without state)
        county_pop=$(echo "$county_info" | jq -r '.[1:] | map({(.[0] | split(", ")[0]): .[1]}) | add')

        # Sort the data alphabetically by county name
        sorted_county_pop=$(echo "$county_pop" | jq 'to_entries | sort_by(.key) | from_entries')

        # Add the year beside the retrieved data
        yearly_county_pop=$(echo "$sorted_county_pop" | jq -r '. as $in | {"'$year'": $in}')
        
        # Add all of the counties to the associative array
        county_dictionary["$state"]+="$yearly_county_pop"
    done
done

# Can be changed to any state
echo "${county_dictionary["Alabama"]}" | head -n 10
{
  "2015": {
    "Autauga County": "55347",
    "Baldwin County": "203709",
    "Barbour County": "26489",
    "Bibb County": "22583",
    "Blount County": "57673",
    "Bullock County": "10696",
    "Butler County": "20154",
    "Calhoun County": "115620",