"""This module implements a class to resolve the symbolic references in condensed
structure data.
"""
from __future__ import annotations
from collections import defaultdict, namedtuple
from typing import Any
import numpy as np
from pymatgen.core.periodic_table import get_el_sp
from robocrys.adapter import BaseAdapter
ComponentDetails = namedtuple(
"ComponentDetails",
[
"formula",
"count",
"dimensionality",
"molecule_name",
"orientation",
"nsites",
"index",
],
)
ComponentGroup = namedtuple(
"ComponentGroup",
["formula", "dimensionality", "count", "components", "molecule_name", "nsites"],
)
SiteGroup = namedtuple("SiteGroup", ["element", "count", "sites"])
NeighborSiteDetails = namedtuple(
"NeighborSiteDetails", ["element", "count", "sites", "sym_label"]
)
NextNeighborSiteDetails = namedtuple(
"NextNeighborSiteDetails",
[
"element",
"count",
"geometry",
"sites",
"sym_label",
"connectivity",
"poly_formula",
],
)
[docs]class DescriptionAdapter(BaseAdapter):
"""Class to facilitate pulling data from the condensed structure dictionary.
Attributes:
sym_labels: The symmetry labels as strings.
use_iupac_ordering (bool, optional): Whether to order formulas
by the iupac "electronegativity" series, defined in
Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
Recommendations 2005)". This ordering effectively follows
the groups and rows of the periodic table, except the
Lanthanides, Actanides and hydrogen. If set to ``False``, the
elements will be ordered according to the electronegativity values.
Args:
condensed_structure: The condensed structure data, formatted as produced
by :meth:`robocrys.condense.StructureCondenser.condense_structure`.
"""
def __init__(
self, condensed_structure: dict[str, Any], use_iupac_ordering: bool = True
):
super().__init__(condensed_structure)
self.use_iupac_ordering = use_iupac_ordering
self.sym_labels = {
site_index: self.get_sym_label(site_index) for site_index in self.sites
}
[docs] def get_nearest_neighbor_details(
self, site_index: int, group: bool = False
) -> list[NeighborSiteDetails]:
"""Gets a summary of all the nearest neighbors to a site.
Args:
site_index: An inequivalent site index.
group: Whether to group all nearest neighbor sites
with the same element together.
Returns:
A :obj:`list` of ``NeighborSiteDetails`` objects, each with the
attributes:
- ``element`` (``str``): The element of the nearest neighbor site.
- ``count`` (``int``): The number of sites of this type.
- ``sym_label`` (``str``): The symmetry label.
- ``sites`` (``list[int]``): The site indices representing this
nearest neighbor. Can be more than one site if
``group_by_element=True``.
"""
nn_sites = self.sites[site_index]["nn"]
nn_dict = defaultdict(list)
for nn_site in set(nn_sites):
element = self.sites[nn_site]["element"]
labels = self.sym_labels[nn_site]
identity = (element,) if group else (element, labels)
nn_dict[identity].append(
{"count": nn_sites.count(nn_site), "labels": labels, "site": nn_site}
)
nn_details = []
for identity, nn_group in nn_dict.items():
sites = [nn_site["site"] for nn_site in nn_group]
nn_details.append(
NeighborSiteDetails(
element=identity[0],
sites=sites,
count=sum([nn_site["count"] for nn_site in nn_group]),
sym_label=self.get_sym_label(sites),
)
)
return sorted(nn_details, key=self._site_order)
[docs] def get_next_nearest_neighbor_details(
self, site_index: int, group: bool = False
) -> list[NextNeighborSiteDetails]:
"""Gets a summary of all the next nearest neighbors to a site.
We only get the summaries for next nearest neighbor sites that have a
geometry type listed in :attr:`robocrys.util.connected_geometries` and
have a ``poly_formula``.
Args:
site_index: An inequivalent site index.
group: Whether to group together all next nearest neighbor sites
with the same element, connectivity and geometry but different
symmetry labels.
Returns:
A :obj:`list` of ``NextNeighborSiteDetails`` objects, each with the
attributes:
- ``element`` (``str``): The element of the next nearest neighbor
site.
- ``connectivity`` (``str``): The connectivity type to this site.
- ``geometry`` (``str``): The geometry type of the next nearest
neighbor.
- ``count`` (``int``): The number of sites of this type.
- ``sym_label`` (``str``): The symmetry label.
- ``sites`` (``list[int]``): The site indices representing this
next nearest neighbor. Can be more than one site if
``group=True``.
- ``poly_formula`` (``str``): The polyhedral formula.
"""
nnn = self.sites[site_index]["nnn"]
# get a list of tuples of (nnn_site_index, connectivity)
con_data = [
(nnn_site_index, connectivity)
for connectivity, sites in nnn.items()
for nnn_site_index in set(sites)
]
nnn_dict = defaultdict(list)
for nnn_site, connectivity in con_data:
poly_formula = self.sites[nnn_site]["poly_formula"]
if not poly_formula:
# only interested in describing the connectivity to other
# polyhedral sites of interest.
continue
element = self.sites[nnn_site]["element"]
labels = self.sym_labels[nnn_site]
geometry = self.sites[nnn_site]["geometry"]["type"]
if group:
identity = (element, connectivity, geometry)
else:
identity = (element, connectivity, geometry, labels)
nnn_dict[identity].append(
{
"count": nnn[connectivity].count(nnn_site),
"labels": labels,
"site": nnn_site,
"poly_formula": poly_formula,
}
)
nnn_details = []
for identity, nnn_group in nnn_dict.items():
sites = [nnn_site["site"] for nnn_site in nnn_group]
nnn_details.append(
NextNeighborSiteDetails(
element=identity[0],
connectivity=identity[1],
geometry=identity[2],
sites=sites,
poly_formula=nnn_group[0]["poly_formula"],
count=sum([nn_site["count"] for nn_site in nnn_group]),
sym_label=self.get_sym_label(sites),
)
)
return sorted(nnn_details, key=self._site_order)
[docs] def get_component_details(self) -> list[ComponentDetails]:
"""Gets a summary of all components.
Returns:
A :obj:`list` of ``ComponentDetails`` objects, each with the
attributes:
- ``count`` (``int``): The number of these components in the
structure.
- ``formula`` (``str``): The component formula.
- ``dimensionality`` (``int``): The component dimensionality.
- ``molecule_name`` (``str`` or ``None``): The molecule name if
applicable, else ``None``.
- ``orientation`` (``tuple[int]``): The component orientation.
- ``index`` (``list[int]``): The component inequivalent index.
"""
component_details = []
for index in set(self.component_makeup):
component_details.append(
ComponentDetails(
count=self.component_makeup.count(index),
formula=self.components[index]["formula"],
dimensionality=self.components[index]["dimensionality"],
molecule_name=self.components[index]["molecule_name"],
orientation=self.components[index]["orientation"],
nsites=len(self.components[index]["sites"]),
index=index,
)
)
return sorted(component_details, key=_component_order)
[docs] def get_component_groups(self) -> list[ComponentGroup]:
"""Gets a summary of all components groups.
Returns:
The components, grouped together by formula, dimensionality and
molecule name. The data will be returned as a :obj:`list` of
``ComponentGroup`` objects, each with the attributes:
- ``count`` (``int``): The total number of components in this group.
- ``formula`` (``str``): The formula of the components.
- ``dimensionality`` (``int``): The dimensionality of the
components.
- ``molecule_name`` (``str`` or ``None``): The molecule name if
applicable, else ``None``.
- ``components`` (``list[ComponentDetails]``): The components
in the group.
"""
component_details = self.get_component_details()
grouped_components = defaultdict(list)
for component in component_details:
identity = (
component.dimensionality,
component.formula,
component.molecule_name,
)
grouped_components[identity].append(component)
component_group_details = []
for identity, group in grouped_components.items():
component_group_details.append(
ComponentGroup(
count=sum(component.count for component in group),
dimensionality=identity[0],
formula=identity[1],
molecule_name=identity[2],
components=sorted(group, key=_component_order),
nsites=group[0].nsites,
)
)
return sorted(component_group_details, key=_component_order)
[docs] def get_component_site_groups(self, component_index: int) -> list[SiteGroup]:
"""Gets a summary of the sites in a component.
Returns:
The sites, grouped together by element. The data will be returned
as a :obj:`list` of ``SiteGroup`` objects, each with the attributes:
- ``count`` (``int``): The total number of sites in this group.
- ``element`` (``str``): The site element.
- ``sites`` (``list[int]``): A list of site indices in this group.
"""
sites = list(set(self.components[component_index]["sites"]))
grouped_sites = defaultdict(list)
for site_index in sites:
grouped_sites[self.elements[site_index]].append(site_index)
site_groups = []
for element, group in grouped_sites.items():
site_groups.append(
SiteGroup(
count=sum(sites.count(site_index) for site_index in group),
element=element,
sites=group,
)
)
return sorted(site_groups, key=self._site_order)
[docs] def get_sym_label(self, site_indices: int | list[int]) -> str:
"""Convert site indices into a formatted symmetry label.
Args:
site_indices: THe site indices.
Returns:
The formatted symmetry label. E.g., if the set of symmetry labels
for the sites looks like ``(1, 2)``, the symmetry label will be
``(1,2)``.
"""
if isinstance(site_indices, (int, np.int32)):
# If only one to_site is provided turn it into a list
site_indices = [site_indices]
all_labels = sorted(
[
label
for site_index in site_indices
for label in self.sites[site_index]["sym_labels"]
]
)
return "({})".format(",".join(map(str, sorted(all_labels))))
def _site_order(
self, s: SiteGroup | NeighborSiteDetails | NextNeighborSiteDetails
):
"""Utility function to help sort NeighborSiteDetails and SiteGroups."""
specie = get_el_sp(s.element)
x = specie.iupac_ordering if self.use_iupac_ordering else specie.X
if isinstance(s, NeighborSiteDetails):
return [x, s.count, s.sym_label, s.sites]
if isinstance(s, NextNeighborSiteDetails):
return [
s.connectivity,
s.geometry,
s.count,
x,
s.poly_formula,
s.sym_label,
s.sites,
]
return [x, s.count, s.sites]
def _component_order(c: ComponentDetails | ComponentGroup):
"""Utility function to help sort ComponentDetails and ComponentGroups."""
mn = c.molecule_name if c.molecule_name else "z"
if isinstance(c, ComponentDetails):
ori = c.orientation if c.orientation else (0, 0, 0)
return [mn, c.dimensionality, c.formula, ori, c.count]
return [mn, c.dimensionality, c.formula, c.count]