From 233cf3bfe70c5abe0b2c3ab4a734777e53dc198a Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Fri, 8 Aug 2025 11:07:03 -0400 Subject: [PATCH 1/5] Docstring corrections --- pubchempy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pubchempy.py b/pubchempy.py index 53fcf94..72c053b 100644 --- a/pubchempy.py +++ b/pubchempy.py @@ -601,12 +601,11 @@ def get_properties( as_dataframe: bool = False, **kwargs: QueryParam, ) -> list[dict[str, t.Any]] | pd.DataFrame: - """Retrieve the specified properties from PubChem. + """Retrieve the specified compound properties from PubChem. Args: properties: The properties to retrieve. - identifier: The compound, substance or assay identifier to use as a search - query. + identifier: The compound identifier to use as a search query. namespace: The identifier type. searchtype: The advanced search type, one of substructure, superstructure or similarity. @@ -643,7 +642,7 @@ def get_synonyms( Args: identifier: The identifier to use as a search query. namespace: The identifier type (e.g., cid, name, smiles for compounds). - domain: The PubChem domain to search (compound, substance, or assay). + domain: The PubChem domain to search (compound or substance). searchtype: The advanced search type, one of substructure, superstructure or similarity. **kwargs: Additional parameters to pass to the request. From 5eff29263237a69f6fbab45b33abf11050aa25c1 Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Fri, 8 Aug 2025 11:08:09 -0400 Subject: [PATCH 2/5] Add PUG REST page to the docs --- docs/guide/introduction.md | 9 +------- docs/guide/pugrest.md | 44 ++++++++++++++++++++++++++++++++++++++ docs/index.md | 1 + 3 files changed, 46 insertions(+), 8 deletions(-) create mode 100644 docs/guide/pugrest.md diff --git a/docs/guide/introduction.md b/docs/guide/introduction.md index bc0fec9..5abcea5 100644 --- a/docs/guide/introduction.md +++ b/docs/guide/introduction.md @@ -8,12 +8,7 @@ PubChemPy relies entirely on the PubChem database and chemical toolkits provided This is important to remember when using PubChemPy: Every request you make is transmitted to the PubChem servers, evaluated, and then a response is sent back. There are some downsides to this: It is less suitable for confidential work, it requires a constant internet connection, and some tasks will be slower than if they were performed locally on your own computer. On the other hand, this means we have the vast resources of the PubChem database and chemical toolkits at our disposal. As a result, it is possible to do complex similarity and substructure searching against a database containing tens of millions of compounds in seconds, without needing any of the storage space or computational power on your own local computer. -## The PUG REST web service - -You don't need to worry too much about how the PubChem web service works, because PubChemPy handles all of the details for you. But if you want to go beyond the capabilities of PubChemPy, there is some helpful documentation on the PubChem website. - -- [PUG REST Tutorial]: Explains how the web service works with a variety of usage examples. -- [PUG REST Specification]: A more comprehensive but dense specification that details every possible way to use the web service. +See the {doc}`pugrest` page for more information about how PubChemPy uses the PubChem web service. ## PubChemPy license @@ -27,6 +22,4 @@ You don't need to worry too much about how the PubChem web service works, becaus [^f1]: That's a lot of acronyms! PUG stands for "Power User Gateway", a term used to describe a variety of methods for programmatic access to PubChem data and services. REST stands for [Representational State Transfer], which describes the specific architectural style of the web service. [pubchem website]: https://pubchem.ncbi.nlm.nih.gov -[pug rest specification]: https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest -[pug rest tutorial]: https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest-tutorial [representational state transfer]: https://en.wikipedia.org/wiki/Representational_state_transfer diff --git a/docs/guide/pugrest.md b/docs/guide/pugrest.md new file mode 100644 index 0000000..ccbe349 --- /dev/null +++ b/docs/guide/pugrest.md @@ -0,0 +1,44 @@ +(pugrest)= + +# PUG REST + +PUG (Power User Gateway) REST is a web service that PubChem provides for programmatic access to its data. PubChemPy uses this web service to interact with the PubChem database, allowing you to search for compounds, substances, and assays, retrieve their properties, and perform various operations without needing to download or store large datasets locally. + +You don't need to worry too much about how the PubChem web service works, because PubChemPy handles all of the details for you. But understanding the underlying architecture can help you use PubChemPy more effectively and troubleshoot issues. + +## PUG REST Architecture + +The PUG REST API is built around a three-part request pattern: + +1. **Input**: Specifies which records you're interested in (by CID, name, SMILES, etc.) +2. **Operation**: Defines what to do with those records (retrieve properties, search, etc.) +3. **Output**: Determines the format of the returned data (JSON, XML, CSV, etc.) + +This modular design allows for flexible combinations. For example, you can combine structure input via SMILES with property retrieval operations and CSV output - all handled seamlessly by PubChemPy. + +## Request Flow + +When you make a request with PubChemPy: + +1. Your Python request is translated into a PUG REST URL (and possibly some POST data). +2. The request is sent to PubChem's servers via HTTPS. +3. PubChem processes the request using their chemical databases and toolkits. +4. Results are returned and parsed by PubChemPy into Python objects. + +PubChem contains over 300 million substance records, over 100 million standardized compound records, and over 1 million biological assays. All this data may be accessed and processed through PubChemPy without requiring local storage or computational resources. + +## When to Use Alternatives + +While PubChemPy and PUG REST are excellent for many tasks, consider alternatives for: + +- **Bulk data processing**: Use PubChem's bulk download services for large datasets +- **Confidential work**: Consider local chemical toolkits for sensitive data +- **Offline work**: The PUG REST API requires an internet connection + +## Further Reading + +If you want to go beyond the capabilities of PubChemPy, there is helpful documentation about programmatic access to PubChem data on the PubChem website: + +- [Programmatic Access to PubChem](https://pubchem.ncbi.nlm.nih.gov/docs/programmatic-access): Overview of how to access PubChem data programmatically. +- [PUG REST Tutorial](https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest): Explains how the web service works with a variety of usage examples. +- [PUG REST Specification](https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest-tutorial): A more comprehensive but dense specification that details every possible way to use the web service. diff --git a/docs/index.md b/docs/index.md index 160f4b6..3e26b80 100644 --- a/docs/index.md +++ b/docs/index.md @@ -68,6 +68,7 @@ guide/properties guide/pandas guide/download guide/advanced +guide/pugrest guide/contribute ``` From 36cabfd59d608453e1bd10f731ac8bf489de4ef0 Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Fri, 8 Aug 2025 11:16:17 -0400 Subject: [PATCH 3/5] Docs tweaks --- docs/guide/advanced.md | 2 +- docs/guide/compound.md | 10 ++++++++-- docs/guide/contribute.md | 2 +- docs/guide/pandas.md | 8 +++----- docs/guide/pugrest.md | 8 ++++---- docs/guide/searching.md | 12 ++---------- docs/guide/substance.md | 2 +- 7 files changed, 20 insertions(+), 24 deletions(-) diff --git a/docs/guide/advanced.md b/docs/guide/advanced.md index e7a9da6..0a7507b 100644 --- a/docs/guide/advanced.md +++ b/docs/guide/advanced.md @@ -1,6 +1,6 @@ (advanced)= -# Advanced Usage +# Advanced usage This guide covers advanced PubChemPy usage patterns, API best practices, error handling, logging, and low-level request functions. diff --git a/docs/guide/compound.md b/docs/guide/compound.md index dee8456..6ec1c22 100644 --- a/docs/guide/compound.md +++ b/docs/guide/compound.md @@ -1,6 +1,6 @@ (compound)= -# Compound +# Compounds The {func}`~pubchempy.get_compounds` function returns a list of {class}`~pubchempy.Compound` objects. You can also instantiate a {class}`~pubchempy.Compound` object directly if you know its CID: @@ -25,7 +25,13 @@ Additionally, each {class}`~pubchempy.Compound` provides a {meth}`~pubchempy.Com 'inchi': u'InChI=1S/H2O/h1H2'} ``` -## 3D Compounds +## 3D compounds + +By default, compounds are returned with 2D coordinates. Use the `record_type` keyword argument to specify otherwise: + +```python +pcp.get_compounds('Aspirin', 'name', record_type='3d') +``` Many properties are missing from 3D records, and the following properties are *only* available on 3D records: diff --git a/docs/guide/contribute.md b/docs/guide/contribute.md index 3fe682e..41bc9ac 100644 --- a/docs/guide/contribute.md +++ b/docs/guide/contribute.md @@ -1,6 +1,6 @@ (contribute)= -# Contribute +# Contributing The [Issue Tracker] is the best place to post any feature ideas, requests and bug reports. diff --git a/docs/guide/pandas.md b/docs/guide/pandas.md index c492ca2..fb61539 100644 --- a/docs/guide/pandas.md +++ b/docs/guide/pandas.md @@ -2,15 +2,15 @@ # *pandas* integration -## Getting *pandas* +## Installing *pandas* -*pandas* must be installed to use its functionality from within PubChemPy. The easiest way is to use pip: +*pandas* must be installed to use its functionality from within PubChemPy. It is an optional dependency, so it is not installed automatically with PubChemPy. The easiest way is to use pip: ```bash pip install pandas ``` -See the [pandas documentation] for more information. +See the [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/) for more information. ## Usage @@ -28,5 +28,3 @@ An existing list of {class}`~pubchempy.Compound` objects can be converted into a cs = pcp.get_compounds('C20H41Br', 'formula') df4 = pcp.compounds_to_frame(cs, properties=['smiles', 'xlogp', 'rotatable_bond_count']) ``` - -[pandas documentation]: https://pandas.pydata.org/pandas-docs/stable/ diff --git a/docs/guide/pugrest.md b/docs/guide/pugrest.md index ccbe349..4e932a3 100644 --- a/docs/guide/pugrest.md +++ b/docs/guide/pugrest.md @@ -6,7 +6,7 @@ PUG (Power User Gateway) REST is a web service that PubChem provides for program You don't need to worry too much about how the PubChem web service works, because PubChemPy handles all of the details for you. But understanding the underlying architecture can help you use PubChemPy more effectively and troubleshoot issues. -## PUG REST Architecture +## PUG REST architecture The PUG REST API is built around a three-part request pattern: @@ -16,7 +16,7 @@ The PUG REST API is built around a three-part request pattern: This modular design allows for flexible combinations. For example, you can combine structure input via SMILES with property retrieval operations and CSV output - all handled seamlessly by PubChemPy. -## Request Flow +## Request flow When you make a request with PubChemPy: @@ -27,7 +27,7 @@ When you make a request with PubChemPy: PubChem contains over 300 million substance records, over 100 million standardized compound records, and over 1 million biological assays. All this data may be accessed and processed through PubChemPy without requiring local storage or computational resources. -## When to Use Alternatives +## When to use alternatives While PubChemPy and PUG REST are excellent for many tasks, consider alternatives for: @@ -35,7 +35,7 @@ While PubChemPy and PUG REST are excellent for many tasks, consider alternatives - **Confidential work**: Consider local chemical toolkits for sensitive data - **Offline work**: The PUG REST API requires an internet connection -## Further Reading +## Further reading If you want to go beyond the capabilities of PubChemPy, there is helpful documentation about programmatic access to PubChem data on the PubChem website: diff --git a/docs/guide/searching.md b/docs/guide/searching.md index 4c4654a..e4cac02 100644 --- a/docs/guide/searching.md +++ b/docs/guide/searching.md @@ -2,17 +2,9 @@ # Searching -## 2D and 3D coordinates +PubChemPy provides powerful search capabilities that leverage PubChem's extensive chemical databases. Understanding the different search types and their performance characteristics can help you choose the most efficient approach for your needs. -By default, compounds are returned with 2D coordinates. Use the `record_type` keyword argument to specify otherwise: - -```python -pcp.get_compounds('Aspirin', 'name', record_type='3d') -``` - -## Advanced search types - -By default, requests look for an exact match with the input. Alternatively, you can specify substructure, superstructure, similarity and identity searches using the `searchtype` keyword argument: +By default, requests look for an exact match with the input. Alternatively, you can specify a search type using the `searchtype` parameter to perform chemical substructure, superstructure, similarity, or identity searches. ```python pcp.get_compounds('CC', 'smiles', searchtype='superstructure', listkey_count=3) diff --git a/docs/guide/substance.md b/docs/guide/substance.md index 7b0cb1c..913e549 100644 --- a/docs/guide/substance.md +++ b/docs/guide/substance.md @@ -1,6 +1,6 @@ (substance)= -# Substance +# Substances The PubChem Substance database contains all chemical records deposited in PubChem in their most raw form, before any significant processing is applied. As a result, it contains duplicates, mixtures, and some records that don't make chemical sense. This means that {class}`~pubchempy.Substance` records contain fewer calculated properties, however they do have additional information about the original source that deposited the record. From c47eb6b559e15fc854c1f7eab699c3ce917bb956 Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Fri, 8 Aug 2025 12:05:48 -0400 Subject: [PATCH 4/5] docs code blocks --- README.md | 2 +- docs/guide/advanced.md | 10 +++++----- docs/guide/compound.md | 6 +++--- docs/guide/download.md | 4 ++-- docs/guide/gettingstarted.md | 18 +++++++++--------- docs/guide/pandas.md | 8 ++++---- docs/guide/properties.md | 16 ++++++++-------- docs/guide/searching.md | 6 +++--- docs/guide/substance.md | 6 +++--- docs/index.md | 6 +++--- 10 files changed, 41 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 288f679..f56d04a 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ PubChemPy provides a way to interact with PubChem in Python. It allows chemical searches by name, substructure and similarity, chemical standardization, conversion between chemical file formats, depiction and retrieval of chemical properties. -```python +```pycon >>> from pubchempy import get_compounds, Compound >>> comp = Compound.from_cid(1423) >>> print(comp.smiles) diff --git a/docs/guide/advanced.md b/docs/guide/advanced.md index 0a7507b..47786fc 100644 --- a/docs/guide/advanced.md +++ b/docs/guide/advanced.md @@ -13,7 +13,7 @@ If there are too many results for a request, you will receive a TimeoutError. Th If retrieving full compound or substance records, instead request a list of cids or sids for your input, and then request the full records for those identifiers individually or in small groups. For example: ```python -sids = get_sids('Aspirin', 'name') +sids = get_sids("Aspirin", "name") for sid in sids: s = Substance.from_sid(sid) ``` @@ -21,8 +21,8 @@ for sid in sids: When using the `formula` namespace or a `searchtype`, you can also alternatively use the `listkey_count` and `listkey_start` keyword arguments to specify pagination. The `listkey_count` value specifies the number of results per page, and the `listkey_start` value specifies which page to return. For example: ```python -get_compounds('CC', 'smiles', searchtype='substructure', listkey_count=5) -get('C10H21N', 'formula', listkey_count=3, listkey_start=6) +get_compounds("CC", "smiles", searchtype="substructure", listkey_count=5) +get("C10H21N", "formula", listkey_count=3, listkey_start=6) ``` ## Logging @@ -61,8 +61,8 @@ A simple fix is to specify the proxy information via urllib: ```python import urllib proxy_support = urllib.request.ProxyHandler({ - 'http': 'http://:', - 'https': 'https://:' + "http": "http://:", + "https": "https://:" }) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) diff --git a/docs/guide/compound.md b/docs/guide/compound.md index 6ec1c22..2c4b56c 100644 --- a/docs/guide/compound.md +++ b/docs/guide/compound.md @@ -14,9 +14,9 @@ Each {class}`~pubchempy.Compound` has a `record` property, which is a dictionary Additionally, each {class}`~pubchempy.Compound` provides a {meth}`~pubchempy.Compound.to_dict` method that returns PubChemPy's own dictionary representation of the Compound data. As well as being more concisely formatted than the raw `record`, this method also takes an optional parameter to filter the list of the desired properties: -```python +```pycon >>> c = pcp.Compound.from_cid(962) ->>> c.to_dict(properties=['atoms', 'bonds', 'inchi']) +>>> c.to_dict(properties=["atoms", "bonds", "inchi"]) {'atoms': [{'aid': 1, 'element': 'o', 'x': 2.5369, 'y': -0.155}, {'aid': 2, 'element': 'h', 'x': 3.0739, 'y': 0.155}, {'aid': 3, 'element': 'h', 'x': 2, 'y': 0.155}], @@ -30,7 +30,7 @@ Additionally, each {class}`~pubchempy.Compound` provides a {meth}`~pubchempy.Com By default, compounds are returned with 2D coordinates. Use the `record_type` keyword argument to specify otherwise: ```python -pcp.get_compounds('Aspirin', 'name', record_type='3d') +pcp.get_compounds("Aspirin", "name", record_type="3d") ``` Many properties are missing from 3D records, and the following properties are *only* available on 3D records: diff --git a/docs/guide/download.md b/docs/guide/download.md index 0b706af..1d8db89 100644 --- a/docs/guide/download.md +++ b/docs/guide/download.md @@ -7,8 +7,8 @@ The {func}`~pubchempy.download` function is for saving a file to disk. The follo Examples: ```python -pcp.download('PNG', 'asp.png', 'Aspirin', 'name') -pcp.download('CSV', 's.csv', [1,2,3], operation='property/ConnectivitySMILES,SMILES') +pcp.download("PNG", "asp.png", "Aspirin", "name") +pcp.download("CSV", "s.csv", [1,2,3], operation="property/ConnectivitySMILES,SMILES") ``` For PNG images, the `image_size` argument can be used to specify `large`, `small` diff --git a/docs/guide/gettingstarted.md b/docs/guide/gettingstarted.md index b61fe8f..3a66971 100644 --- a/docs/guide/gettingstarted.md +++ b/docs/guide/gettingstarted.md @@ -10,19 +10,19 @@ Retrieving information about a specific Compound in the PubChem database is simp Begin by importing PubChemPy: -```python +```pycon >>> import pubchempy as pcp ``` Let's get the {class}`~pubchempy.Compound` with [CID 5090]: -```python +```pycon >>> c = pcp.Compound.from_cid(5090) ``` Now we have a {class}`~pubchempy.Compound` object called `c`. We can get all the information we need from this object: -```python +```pycon >>> print(c.molecular_formula) C17H14O4S >>> print(c.molecular_weight) @@ -43,7 +43,7 @@ All the code examples in this documentation will assume you have imported PubChe ```python from pubchempy import Compound, get_compounds c = Compound.from_cid(1423) -cs = get_compounds('Aspirin', 'name') +cs = get_compounds("Aspirin", "name") ``` ```` @@ -51,15 +51,15 @@ cs = get_compounds('Aspirin', 'name') What if you don't know the PubChem CID of the Compound you want? Just use the {func}`~pubchempy.get_compounds` function: -```python ->>> results = pcp.get_compounds('Glucose', 'name') +```pycon +>>> results = pcp.get_compounds("Glucose", "name") >>> print(results) [Compound(5793)] ``` The first argument is the identifier, and the second argument is the identifier type, which must be one of `name`, `smiles`, `sdf`, `inchi`, `inchikey` or `formula`. It looks like there are 4 compounds in the PubChem Database that have the name Glucose associated with them. Let's take a look at them in more detail: -```python +```pycon >>> for compound in results: ... print(compound.smiles) C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O @@ -69,8 +69,8 @@ It looks like they all have different stereochemistry information. Retrieving the record for a SMILES string is just as easy: -```python ->>> pcp.get_compounds('C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1', 'smiles') +```pycon +>>> pcp.get_compounds("C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1", "smiles") [Compound(1318)] ``` diff --git a/docs/guide/pandas.md b/docs/guide/pandas.md index fb61539..3519df8 100644 --- a/docs/guide/pandas.md +++ b/docs/guide/pandas.md @@ -17,14 +17,14 @@ See the [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/) fo It is possible for {func}`~pubchempy.get_compounds`, {func}`~pubchempy.get_substances` and {func}`~pubchempy.get_properties` to return a pandas DataFrame: ```python -df1 = pcp.get_compounds('C20H41Br', 'formula', as_dataframe=True) +df1 = pcp.get_compounds("C20H41Br", "formula", as_dataframe=True) df2 = pcp.get_substances([1, 2, 3, 4], as_dataframe=True) -df3 = pcp.get_properties(['smiles', 'xlogp', 'rotatable_bond_count'], 'C20H41Br', 'formula', as_dataframe=True) +df3 = pcp.get_properties(["smiles", "xlogp", "rotatable_bond_count"], "C20H41Br", "formula", as_dataframe=True) ``` An existing list of {class}`~pubchempy.Compound` objects can be converted into a dataframe, optionally specifying the desired columns: ```python -cs = pcp.get_compounds('C20H41Br', 'formula') -df4 = pcp.compounds_to_frame(cs, properties=['smiles', 'xlogp', 'rotatable_bond_count']) +cs = pcp.get_compounds("C20H41Br", "formula") +df4 = pcp.compounds_to_frame(cs, properties=["smiles", "xlogp", "rotatable_bond_count"]) ``` diff --git a/docs/guide/properties.md b/docs/guide/properties.md index 0d187a8..e9bae2c 100644 --- a/docs/guide/properties.md +++ b/docs/guide/properties.md @@ -5,7 +5,7 @@ The {func}`~pubchempy.get_properties` function allows the retrieval of specific properties without having to deal with entire compound records. This is especially useful for retrieving the properties of a large number of compounds at once: ```python -p = pcp.get_properties('SMILES', 'CC', 'smiles', searchtype='superstructure') +p = pcp.get_properties("SMILES", "CC", "smiles", searchtype="superstructure") ``` Multiple properties may be specified in a list, or in a comma-separated string. The available properties are: MolecularFormula, MolecularWeight, ConnectivitySMILES, SMILES, InChI, InChIKey, IUPACName, XLogP, ExactMass, MonoisotopicMass, TPSA, Complexity, Charge, HBondDonorCount, HBondAcceptorCount, RotatableBondCount, HeavyAtomCount, IsotopeAtomCount, AtomStereoCount, DefinedAtomStereoCount, UndefinedAtomStereoCount, BondStereoCount, DefinedBondStereoCount, UndefinedBondStereoCount, CovalentUnitCount, Volume3D, XStericQuadrupole3D, YStericQuadrupole3D, ZStericQuadrupole3D, FeatureCount3D, FeatureAcceptorCount3D, FeatureDonorCount3D, FeatureAnionCount3D, FeatureCationCount3D, FeatureRingCount3D, FeatureHydrophobeCount3D, ConformerModelRMSD3D, EffectiveRotorCount3D, ConformerCount3D. @@ -15,8 +15,8 @@ Multiple properties may be specified in a list, or in a comma-separated string. Get a list of synonyms for a given input using the {func}`~pubchempy.get_synonyms` function: ```python -pcp.get_synonyms('Aspirin', 'name') -pcp.get_synonyms('Aspirin', 'name', 'substance') +pcp.get_synonyms("Aspirin", "name") +pcp.get_synonyms("Aspirin", "name", "substance") ``` Inputs that match more than one SID/CID will have multiple, separate synonyms lists returned. @@ -26,14 +26,14 @@ Inputs that match more than one SID/CID will have multiple, separate synonyms li CAS Registry Numbers are not officially supported by PubChem, but they are often present in the synonyms associated with a compound. Therefore it is straightforward to retrieve them by filtering the synonyms to just those with the CAS Registry Number format: ```python -for result in pcp.get_synonyms('Aspirin', 'name'): - cid = result['CID'] +for result in pcp.get_synonyms("Aspirin", "name"): + cid = result["CID"] cas_rns = [] - for syn in result.get('Synonym', []): - match = re.match(r'(\d{2,7}-\d\d-\d)', syn) + for syn in result.get("Synonym", []): + match = re.match(r"(\d{2,7}-\d\d-\d)", syn) if match: cas_rns.append(match.group(1)) - print(f'CAS registry numbers for CID {cid}: {cas_rns}') + print(f"CAS registry numbers for CID {cid}: {cas_rns}") ``` ## Identifiers diff --git a/docs/guide/searching.md b/docs/guide/searching.md index e4cac02..a519883 100644 --- a/docs/guide/searching.md +++ b/docs/guide/searching.md @@ -7,7 +7,7 @@ PubChemPy provides powerful search capabilities that leverage PubChem's extensiv By default, requests look for an exact match with the input. Alternatively, you can specify a search type using the `searchtype` parameter to perform chemical substructure, superstructure, similarity, or identity searches. ```python -pcp.get_compounds('CC', 'smiles', searchtype='superstructure', listkey_count=3) +pcp.get_compounds("CC", "smiles", searchtype="superstructure", listkey_count=3) ``` The `listkey_count` and `listkey_start` arguments can be used for pagination. Each `searchtype` has its own options that can be specified as keyword arguments. For example, similarity searches have a `Threshold`, and super/substructure searches have `MatchIsotopes`. A full list of options is available in the [PUG REST Specification]. @@ -23,7 +23,7 @@ Unfortunately it isn't directly possible to return to the previous behaviour, bu There area a few different ways you can do this using PubChemPy, but the easiest is probably using the {func}`~pubchempy.get_cids` function: > ```pycon -> >>> pcp.get_cids('2-nonenal', 'name', 'substance', list_return='flat') +> >>> pcp.get_cids("2-nonenal", "name", "substance", list_return="flat") > [17166, 5283335, 5354833] > ``` @@ -32,7 +32,7 @@ This searches the substance database for '2-nonenal', and gets the CID for the c You can then use {meth}`~pubchempy.Compound.from_cid` to get the full {class}`~pubchempy.Compound` record, equivalent to what is returned by {func}`~pubchempy.get_compounds`: > ```pycon -> >>> cids = pcp.get_cids('2-nonenal', 'name', 'substance', list_return='flat') +> >>> cids = pcp.get_cids("2-nonenal", "name", "substance", list_return="flat") > >>> [pcp.Compound.from_cid(cid) for cid in cids] > [Compound(17166), Compound(5283335), Compound(5354833)] > ``` diff --git a/docs/guide/substance.md b/docs/guide/substance.md index 913e549..aa920ff 100644 --- a/docs/guide/substance.md +++ b/docs/guide/substance.md @@ -10,15 +10,15 @@ The PubChem Compound database is constructed from the Substance database using a Retrieve Substances using the {func}`~pubchempy.get_substances` function: -```python ->>> results = pcp.get_substances('Coumarin 343', 'name') +```pycon +>>> results = pcp.get_substances("Coumarin 343", "name") >>> print(results) [Substance(24864499), Substance(85084977), Substance(126686397), Substance(143491255), Substance(152243230), Substance(162092514), Substance(162189467), Substance(186021999), Substance(206257050), ... ] ``` You can also instantiate a {class}`~pubchempy.Substance` directly from its SID: -```python +```pycon >>> substance = pcp.Substance.from_sid(223766453) >>> print(substance.synonyms) ['2-(Acetyloxy)-benzoic acid', '2-(acetyloxy)benzoic acid', '2-acetoxy benzoic acid', '2-acetoxy-benzoic acid', '2-acetoxybenzoic acid', '2-acetyloxybenzoic acid', 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N', 'acetoxybenzoic acid', 'acetyl salicylic acid', 'acetyl-salicylic acid', 'acetylsalicylic acid', 'aspirin', 'o-acetoxybenzoic acid'] diff --git a/docs/index.md b/docs/index.md index 3e26b80..fac1724 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,7 +6,7 @@ This package handles the complexity of the PubChem PUG REST API, providing a sim Here's a quick example showing how to get calculated properties for a specific compound: -```python +```pycon >>> import pubchempy as pcp >>> compound = pcp.Compound.from_cid(2244) # Aspirin >>> print(compound.molecular_formula) @@ -21,8 +21,8 @@ C9H8O4 Here's how to search for a compound by name: -```python ->>> for compound in pcp.get_compounds('glucose', 'name'): +```pycon +>>> for compound in pcp.get_compounds("glucose", "name"): ... print(compound.cid) ... print(compound.smiles) ... From 4e67da415311f1e6422ec1ae6361358c4d72e2de Mon Sep 17 00:00:00 2001 From: Matt Swain Date: Fri, 8 Aug 2025 13:43:56 -0400 Subject: [PATCH 5/5] Improve getting started docs --- docs/guide/gettingstarted.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/guide/gettingstarted.md b/docs/guide/gettingstarted.md index 3a66971..fbcd26e 100644 --- a/docs/guide/gettingstarted.md +++ b/docs/guide/gettingstarted.md @@ -49,7 +49,7 @@ cs = get_compounds("Aspirin", "name") ## Searching -What if you don't know the PubChem CID of the Compound you want? Just use the {func}`~pubchempy.get_compounds` function: +What if you don't know the PubChem CID of the Compound you want? Just use the {func}`~pubchempy.get_compounds` function, for example with a compound name input: ```pycon >>> results = pcp.get_compounds("Glucose", "name") @@ -57,7 +57,9 @@ What if you don't know the PubChem CID of the Compound you want? Just use the {f [Compound(5793)] ``` -The first argument is the identifier, and the second argument is the identifier type, which must be one of `name`, `smiles`, `sdf`, `inchi`, `inchikey` or `formula`. It looks like there are 4 compounds in the PubChem Database that have the name Glucose associated with them. Let's take a look at them in more detail: +The first argument is the identifier, and the second argument is the identifier type, which must be one of `name`, `smiles`, `sdf`, `inchi`, `inchikey` or `formula`. More often than not, only a single result will be returned, but sometimes there are multiple results for a given identifier. Therefore, {func}`~pubchempy.get_compounds` returns a list of {class}`~pubchempy.Compound` objects (even if there is only one result). + +It is possible to iterate over this list to get the individual {class}`~pubchempy.Compound` objects: ```pycon >>> for compound in results: @@ -65,9 +67,15 @@ The first argument is the identifier, and the second argument is the identifier C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O ``` -It looks like they all have different stereochemistry information. +Or you can access the first result directly: + +```pycon +>>> compound = results[0] +>>> print(compound.smiles) +C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O +``` -Retrieving the record for a SMILES string is just as easy: +Retrieving the compound record(s) for a SMILES input is just as easy: ```pycon >>> pcp.get_compounds("C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1", "smiles")