CAS Common Chemistry API in Python#

by Vincent F. Scalfani

These recipe examples were tested on March 23, 2022.

CAS Common Chemistry API Documentation (requires registration): https://www.cas.org/services/commonchemistry-api

Attribution: This tutorial uses the CAS Common Chemistry API. Example data shown is licensed under the CC BY-NC 4.0 license.

1. Common Chemistry Record Detail Retrieval#

Information about substances in CAS Common Chemistry can be retrieved using the /detail API and a CAS RN identifier:

Import libraries#

import json
import requests
from pprint import pprint

Setup API parameters#

detail_base_url = "https://commonchemistry.cas.org/api/detail?"
casrn1 = "10094-36-7" # ethyl cyclohexanepropionate

Request data from CAS Common Chemistry Detail API#

casrn1_data = requests.get(detail_base_url + "cas_rn=" + casrn1).json()
pprint(casrn1_data)
{'canonicalSmile': 'O=C(OCC)CCC1CCCCC1',
 'experimentalProperties': [{'name': 'Boiling Point',
                             'property': '105-113 °C @ Press: 17 Torr',
                             'sourceNumber': 1}],
 'hasMolfile': True,
 'image': '<svg width="228.6" viewBox="0 0 7620 3716" text-rendering="auto" '
          'stroke-width="1" stroke-opacity="1" stroke-miterlimit="10" '
          'stroke-linejoin="miter" stroke-linecap="square" '
          'stroke-dashoffset="0" stroke-dasharray="none" stroke="black" '
          'shape-rendering="auto" image-rendering="auto" height="111.48" '
          'font-weight="normal" font-style="normal" font-size="12" '
          'font-family="\'Dialog\'" fill-opacity="1" fill="black" '
          'color-rendering="auto" color-interpolation="auto" '
          'xmlns="http://www.w3.org/2000/svg"><g><g stroke="white" '
          'fill="white"><rect y="0" x="0" width="7620" stroke="none" '
          'height="3716"/></g><g transform="translate(32866,32758)" '
          'text-rendering="geometricPrecision" stroke-width="44" '
          'stroke-linejoin="round" stroke-linecap="round"><line y2="-30850" '
          'y1="-31419" x2="-30792" x1="-31777" fill="none"/><line y2="-29715" '
          'y1="-30850" x2="-30792" x1="-30792" fill="none"/><line y2="-31419" '
          'y1="-30850" x2="-31777" x1="-32762" fill="none"/><line y2="-29146" '
          'y1="-29715" x2="-31777" x1="-30792" fill="none"/><line y2="-30850" '
          'y1="-29715" x2="-32762" x1="-32762" fill="none"/><line y2="-29715" '
          'y1="-29146" x2="-32762" x1="-31777" fill="none"/><line y2="-31376" '
          'y1="-30850" x2="-29885" x1="-30792" fill="none"/><line y2="-30850" '
          'y1="-31376" x2="-28978" x1="-29885" fill="none"/><line y2="-31376" '
          'y1="-30850" x2="-28071" x1="-28978" fill="none"/><line y2="-30960" '
          'y1="-31376" x2="-27352" x1="-28071" fill="none"/><line y2="-31376" '
          'y1="-30960" x2="-26257" x1="-26976" fill="none"/><line y2="-30850" '
          'y1="-31376" x2="-25350" x1="-26257" fill="none"/><line y2="-32202" '
          'y1="-31376" x2="-28140" x1="-28140" fill="none"/><line y2="-32202" '
          'y1="-31376" x2="-28002" x1="-28002" fill="none"/><text y="-30671" '
          'xml:space="preserve" x="-27317" stroke="none" font-size="433.3333" '
          'font-family="sans-serif">O</text><text y="-32242" '
          'xml:space="preserve" x="-28224" stroke="none" font-size="433.3333" '
          'font-family="sans-serif">O</text></g></g></svg>',
 'inchi': 'InChI=1S/C11H20O2/c1-2-13-11(12)9-8-10-6-4-3-5-7-10/h10H,2-9H2,1H3',
 'inchiKey': 'InChIKey=NRVPMFHPHGBQLP-UHFFFAOYSA-N',
 'molecularFormula': 'C<sub>11</sub>H<sub>20</sub>O<sub>2</sub>',
 'molecularMass': '184.28',
 'name': 'Ethyl cyclohexanepropionate',
 'propertyCitations': [{'docUri': 'document/pt/document/22252593',
                        'source': 'De Benneville, Peter L.; Journal of the '
                                  'American Chemical Society, (1940), 62, '
                                  '283-7, CAplus',
                        'sourceNumber': 1}],
 'replacedRns': [],
 'rn': '10094-36-7',
 'smile': 'C(CC(OCC)=O)C1CCCCC1',
 'synonyms': ['Cyclohexanepropanoic acid, ethyl ester',
              'Cyclohexanepropionic acid, ethyl ester',
              'Ethyl cyclohexanepropionate',
              'Ethyl cyclohexylpropanoate',
              'Ethyl 3-cyclohexylpropionate',
              'Ethyl 3-cyclohexylpropanoate',
              '3-Cyclohexylpropionic acid ethyl ester',
              'NSC 71463',
              'Ethyl 3-cyclohexanepropionate'],
 'uri': 'substance/pt/10094367'}

Display the Molecule Drawing#

# get svg image text
svg_string1 = casrn1_data["image"]

# display the molecule
from IPython.display import SVG
SVG(svg_string1)
../../_images/a65670381e4045b19ed2ec31282b93988bd81cbbb23b82ca3a1b99c8d51d5af7.svg

Select some specific data#

# Get Experimental Properties
casrn1_data["experimentalProperties"][0]
{'name': 'Boiling Point',
 'property': '105-113 °C @ Press: 17 Torr',
 'sourceNumber': 1}
# Get Boiling Point property
casrn1_data["experimentalProperties"][0]["property"]
'105-113 °C @ Press: 17 Torr'
# Get InChIKey
casrn1_data["inchiKey"]
'InChIKey=NRVPMFHPHGBQLP-UHFFFAOYSA-N'
# Get Canonical SMILES
casrn1_data["canonicalSmile"]
'O=C(OCC)CCC1CCCCC1'

2. Common Chemistry API record detail retrieval in a loop#

Import libraries#

import json
import requests
from pprint import pprint
from time import sleep

Setup API parameters#

detail_base_url = "https://commonchemistry.cas.org/api/detail?"
casrn_list = ["10094-36-7", "10031-92-2", "10199-61-8", "10036-21-2", "1019020-13-3"]

Request data for each CAS RN and save to a list#

casrn_data = []
for casrn in casrn_list:
    casrn_data.append(requests.get(detail_base_url + "cas_rn=" + casrn).json())
    sleep(1) # add a delay between API calls
casrn_data[0:2] # vew first 2
[{'canonicalSmile': 'O=C(OCC)CCC1CCCCC1',
  'experimentalProperties': [{'name': 'Boiling Point',
    'property': '105-113 °C @ Press: 17 Torr',
    'sourceNumber': 1}],
  'hasMolfile': True,
  'image': '<svg width="228.6" viewBox="0 0 7620 3716" text-rendering="auto" stroke-width="1" stroke-opacity="1" stroke-miterlimit="10" stroke-linejoin="miter" stroke-linecap="square" stroke-dashoffset="0" stroke-dasharray="none" stroke="black" shape-rendering="auto" image-rendering="auto" height="111.48" font-weight="normal" font-style="normal" font-size="12" font-family="\'Dialog\'" fill-opacity="1" fill="black" color-rendering="auto" color-interpolation="auto" xmlns="http://www.w3.org/2000/svg"><g><g stroke="white" fill="white"><rect y="0" x="0" width="7620" stroke="none" height="3716"/></g><g transform="translate(32866,32758)" text-rendering="geometricPrecision" stroke-width="44" stroke-linejoin="round" stroke-linecap="round"><line y2="-30850" y1="-31419" x2="-30792" x1="-31777" fill="none"/><line y2="-29715" y1="-30850" x2="-30792" x1="-30792" fill="none"/><line y2="-31419" y1="-30850" x2="-31777" x1="-32762" fill="none"/><line y2="-29146" y1="-29715" x2="-31777" x1="-30792" fill="none"/><line y2="-30850" y1="-29715" x2="-32762" x1="-32762" fill="none"/><line y2="-29715" y1="-29146" x2="-32762" x1="-31777" fill="none"/><line y2="-31376" y1="-30850" x2="-29885" x1="-30792" fill="none"/><line y2="-30850" y1="-31376" x2="-28978" x1="-29885" fill="none"/><line y2="-31376" y1="-30850" x2="-28071" x1="-28978" fill="none"/><line y2="-30960" y1="-31376" x2="-27352" x1="-28071" fill="none"/><line y2="-31376" y1="-30960" x2="-26257" x1="-26976" fill="none"/><line y2="-30850" y1="-31376" x2="-25350" x1="-26257" fill="none"/><line y2="-32202" y1="-31376" x2="-28140" x1="-28140" fill="none"/><line y2="-32202" y1="-31376" x2="-28002" x1="-28002" fill="none"/><text y="-30671" xml:space="preserve" x="-27317" stroke="none" font-size="433.3333" font-family="sans-serif">O</text><text y="-32242" xml:space="preserve" x="-28224" stroke="none" font-size="433.3333" font-family="sans-serif">O</text></g></g></svg>',
  'inchi': 'InChI=1S/C11H20O2/c1-2-13-11(12)9-8-10-6-4-3-5-7-10/h10H,2-9H2,1H3',
  'inchiKey': 'InChIKey=NRVPMFHPHGBQLP-UHFFFAOYSA-N',
  'molecularFormula': 'C<sub>11</sub>H<sub>20</sub>O<sub>2</sub>',
  'molecularMass': '184.28',
  'name': 'Ethyl cyclohexanepropionate',
  'propertyCitations': [{'docUri': 'document/pt/document/22252593',
    'source': 'De Benneville, Peter L.; Journal of the American Chemical Society, (1940), 62, 283-7, CAplus',
    'sourceNumber': 1}],
  'replacedRns': [],
  'rn': '10094-36-7',
  'smile': 'C(CC(OCC)=O)C1CCCCC1',
  'synonyms': ['Cyclohexanepropanoic acid, ethyl ester',
   'Cyclohexanepropionic acid, ethyl ester',
   'Ethyl cyclohexanepropionate',
   'Ethyl cyclohexylpropanoate',
   'Ethyl 3-cyclohexylpropionate',
   'Ethyl 3-cyclohexylpropanoate',
   '3-Cyclohexylpropionic acid ethyl ester',
   'NSC 71463',
   'Ethyl 3-cyclohexanepropionate'],
  'uri': 'substance/pt/10094367'},
 {'canonicalSmile': 'O=C(C#CCCCCCC)OCC',
  'experimentalProperties': [],
  'hasMolfile': True,
  'image': '<svg width="318.24" viewBox="0 0 10608 2283" text-rendering="auto" stroke-width="1" stroke-opacity="1" stroke-miterlimit="10" stroke-linejoin="miter" stroke-linecap="square" stroke-dashoffset="0" stroke-dasharray="none" stroke="black" shape-rendering="auto" image-rendering="auto" height="68.49" font-weight="normal" font-style="normal" font-size="12" font-family="\'Dialog\'" fill-opacity="1" fill="black" color-rendering="auto" color-interpolation="auto" xmlns="http://www.w3.org/2000/svg"><g><g stroke="white" fill="white"><rect y="0" x="0" width="10608" stroke="none" height="2283"/></g><g transform="translate(32866,32758)" text-rendering="geometricPrecision" stroke-width="44" stroke-linejoin="round" stroke-linecap="round"><line y2="-31899" y1="-31899" x2="-26132" x1="-27178" fill="none"/><line y2="-31988" y1="-31988" x2="-26132" x1="-27178" fill="none"/><line y2="-31809" y1="-31809" x2="-26132" x1="-27178" fill="none"/><line y2="-31899" y1="-31899" x2="-28227" x1="-27178" fill="none"/><line y2="-31376" y1="-31899" x2="-29134" x1="-28227" fill="none"/><line y2="-31899" y1="-31376" x2="-30041" x1="-29134" fill="none"/><line y2="-31376" y1="-31899" x2="-30948" x1="-30041" fill="none"/><line y2="-31899" y1="-31376" x2="-31855" x1="-30948" fill="none"/><line y2="-31376" y1="-31899" x2="-32762" x1="-31855" fill="none"/><line y2="-31899" y1="-31899" x2="-25084" x1="-26132" fill="none"/><line y2="-32315" y1="-31899" x2="-24364" x1="-25084" fill="none"/><line y2="-31899" y1="-32315" x2="-23270" x1="-23989" fill="none"/><line y2="-32422" y1="-31899" x2="-22362" x1="-23270" fill="none"/><line y2="-31070" y1="-31899" x2="-25014" x1="-25014" fill="none"/><line y2="-31070" y1="-31899" x2="-25153" x1="-25153" fill="none"/><text y="-32242" xml:space="preserve" x="-24330" stroke="none" font-size="433.3333" font-family="sans-serif">O</text><text y="-30671" xml:space="preserve" x="-25237" stroke="none" font-size="433.3333" font-family="sans-serif">O</text></g></g></svg>',
  'inchi': 'InChI=1S/C11H18O2/c1-3-5-6-7-8-9-10-11(12)13-4-2/h3-8H2,1-2H3',
  'inchiKey': 'InChIKey=BFZNMUGAZYAMTG-UHFFFAOYSA-N',
  'molecularFormula': 'C<sub>11</sub>H<sub>18</sub>O<sub>2</sub>',
  'molecularMass': '182.26',
  'name': 'Ethyl 2-nonynoate',
  'propertyCitations': [],
  'replacedRns': [],
  'rn': '10031-92-2',
  'smile': 'C(C#CCCCCCC)(OCC)=O',
  'synonyms': ['2-Nonynoic acid, ethyl ester',
   'Ethyl 2-nonynoate',
   'NSC 190985'],
  'uri': 'substance/pt/10031922'}]

Display Molecule Drawings#

from IPython.display import SVG
# get svg image text
svg_strings = []
for svg_idx in range(len(casrn_data)):
    svg_strings.append(casrn_data[svg_idx]["image"])

# display the molecules
for svg_string in svg_strings:
    display(SVG(svg_string))
../../_images/a65670381e4045b19ed2ec31282b93988bd81cbbb23b82ca3a1b99c8d51d5af7.svg ../../_images/99360c498c85017d230124e0814f7361c708211831278820f945ff07029f9070.svg ../../_images/710ccf464bfa46cecce1290c78f3fb14431f6deeb6ff5d7d5f2e93af85c58673.svg ../../_images/8a695350fcafcb8983d23facab2dd22d45a80b6abcd71be91745a57be950b44e.svg ../../_images/27e5fabbf9df8c217ac0788366aad6659f190f9430271065ac753bb1785e15aa.svg

Select some specific data#

# Get canonical SMILES
cansmiles = []
for cansmi in range(len(casrn_data)):
    cansmiles.append(casrn_data[cansmi]["canonicalSmile"])
print(cansmiles)
['O=C(OCC)CCC1CCCCC1', 'O=C(C#CCCCCCC)OCC', 'O=C(OCC)CN1N=CC=C1', 'O=C(OCC)C1=CC=CC(=C1)CCC(=O)OCC', 'N=C(OCC)C1=CCCCC1']
# Get synonyms
synonyms_list = []
for syn in range(len(casrn_data)):
    synonyms_list.append(casrn_data[syn]["synonyms"])
pprint(synonyms_list)
[['Cyclohexanepropanoic acid, ethyl ester',
  'Cyclohexanepropionic acid, ethyl ester',
  'Ethyl cyclohexanepropionate',
  'Ethyl cyclohexylpropanoate',
  'Ethyl 3-cyclohexylpropionate',
  'Ethyl 3-cyclohexylpropanoate',
  '3-Cyclohexylpropionic acid ethyl ester',
  'NSC 71463',
  'Ethyl 3-cyclohexanepropionate'],
 ['2-Nonynoic acid, ethyl ester', 'Ethyl 2-nonynoate', 'NSC 190985'],
 ['1<em>H</em>-Pyrazole-1-acetic acid, ethyl ester',
  'Pyrazole-1-acetic acid, ethyl ester',
  'Ethyl 1<em>H</em>-pyrazole-1-acetate',
  'Ethyl 1-pyrazoleacetate',
  'Ethyl 2-(1<em>H</em>-pyrazol-1-yl)acetate'],
 ['Benzenepropanoic acid, 3-(ethoxycarbonyl)-, ethyl ester',
  'Hydrocinnamic acid, <em>m</em>-carboxy-, diethyl ester',
  'Ethyl 3-(ethoxycarbonyl)benzenepropanoate'],
 ['1-Cyclohexene-1-carboximidic acid, ethyl ester',
  'Ethyl 1-cyclohexene-1-carboximidate']]
# Transform synonym "list of lists" to a flat list
synonyms_flat = []
for sublist in synonyms_list:
    for synonym in sublist:
        synonyms_flat.append(synonym)    
pprint(synonyms_flat)
['Cyclohexanepropanoic acid, ethyl ester',
 'Cyclohexanepropionic acid, ethyl ester',
 'Ethyl cyclohexanepropionate',
 'Ethyl cyclohexylpropanoate',
 'Ethyl 3-cyclohexylpropionate',
 'Ethyl 3-cyclohexylpropanoate',
 '3-Cyclohexylpropionic acid ethyl ester',
 'NSC 71463',
 'Ethyl 3-cyclohexanepropionate',
 '2-Nonynoic acid, ethyl ester',
 'Ethyl 2-nonynoate',
 'NSC 190985',
 '1<em>H</em>-Pyrazole-1-acetic acid, ethyl ester',
 'Pyrazole-1-acetic acid, ethyl ester',
 'Ethyl 1<em>H</em>-pyrazole-1-acetate',
 'Ethyl 1-pyrazoleacetate',
 'Ethyl 2-(1<em>H</em>-pyrazol-1-yl)acetate',
 'Benzenepropanoic acid, 3-(ethoxycarbonyl)-, ethyl ester',
 'Hydrocinnamic acid, <em>m</em>-carboxy-, diethyl ester',
 'Ethyl 3-(ethoxycarbonyl)benzenepropanoate',
 '1-Cyclohexene-1-carboximidic acid, ethyl ester',
 'Ethyl 1-cyclohexene-1-carboximidate']

Create a dataset with Pandas#

import numpy as np
import pandas as pd

df = pd.json_normalize(casrn_data)
df_subset = df[["uri", "rn", "name", "inchiKey", "canonicalSmile", "molecularMass"]]