"""Miscellaneous utility functions and common data.
Attributes:
common_formulas: A set of common formulas. The keys to the data are strings
from :obj:`pymatgen.core.composition.Composition.reduced_formula`.
connected_geometries: A list of geometries that are considered
"connectable" polyhedra. E.g. Their face-sharing, edge-sharing, etc
properties are of interest.
geometry_to_polyhedra: A mapping from geometry type (e.g. octahedral) to the
plural polyhedra name (e.g. octahedra).
dimensionality_to_shape: A mapping from dimensionality to the component
shape.
"""
from __future__ import annotations
import re
from collections import defaultdict
from typing import Any
from monty.json import MontyDecoder
from monty.serialization import loadfn
from importlib.resources import files as import_resource_file
from pymatgen.core.periodic_table import Element, Species, get_el_sp
from pymatgen.util.string import latexify_spacegroup
common_formulas: dict[str, str] = loadfn(
import_resource_file("robocrys.condense") / "formula_db.json.gz"
)
connected_geometries: list[str] = [
"tetrahedral",
"octahedral",
"trigonal pyramidal",
"square pyramidal",
"trigonal bipyramidal",
"pentagonal pyramidal",
"hexagonal pyramidal",
"pentagonal bipyramidal",
"hexagonal bipyramidal",
"cuboctahedral",
]
geometry_to_polyhedra: dict[str, str] = {
"octahedral": "octahedra",
"tetrahedral": "tetrahedra",
"trigonal pyramidal": "trigonal pyramid",
"square pyramidal": "square pyramid",
"trigonal bipyramidal": "trigonal bipyramid",
"pentagonal pyramidal": "pentagonal pyramid",
"hexagonal pyramidal": "hexagonal pyramid",
"pentagonal bipyramidal": "pentagonal bipyramid",
"hexagonal bipyramidal": "hexagonal bipyramid",
"cuboctahedral": "cuboctahedra",
}
polyhedra_plurals: dict[str, str] = {
"octahedra": "octahedra",
"tetrahedra": "tetrahedra",
"trigonal pyramid": "trigonal pyramids",
"square pyramid": "square pyramids",
"trigonal bipyramid": "trigonal bipyramids",
"pentagonal pyramid": "pentagonal pyramids",
"hexagonal pyramid": "hexagonal pyramids",
"pentagonal bipyramid": "pentagonal bipyramids",
"hexagonal bipyramid": "hexagonal bipyramids",
"cuboctahedra": "cuboctahedra",
}
dimensionality_to_shape: dict[int, str] = {
3: "framework",
2: "sheet",
1: "ribbon",
0: "cluster",
}
[docs]def get_el(obj: Element | Species | str | int) -> str:
"""Utility method to get an element str from a symbol, Element, or Specie.
Args:
obj: An arbitrary object. Supported objects are Element/Species objects,
integers (representing atomic numbers), or strings (element
symbols or species strings).
Returns:
The element as a string.
"""
if isinstance(obj, str):
obj = get_el_sp(obj)
if isinstance(obj, Element):
return obj.name
if isinstance(obj, Species):
return obj.element.name
if isinstance(obj, int):
return Element.from_Z(obj).name
raise ValueError(f"Unsupported element type: {type(obj)}.")
[docs]def superscript_number(string):
"""Converts a string containing numbers to superscript.
Will only convert the numbers 0-9, and the + and - characters.
Args:
string: A string containing the numbers 0-9 or +/- characters.
Returns:
The superscript string.
"""
if "." in string:
# no unicode period exists
return string
subscript_unicode_map = {
0: "⁰",
1: "¹",
2: "²",
3: "³",
4: "⁴",
5: "⁵",
6: "⁶",
7: "⁷",
8: "⁸",
9: "⁹",
"-": "⁻",
"+": "⁺",
}
for original_subscript, subscript_unicode in subscript_unicode_map.items():
string = string.replace(str(original_subscript), subscript_unicode)
return string
[docs]def unicodeify_spacegroup(spacegroup_symbol: str) -> str:
"""Formats a spacegroup using unicode symbols.
E.g. Fd-3m -> Fd̅3m
Args:
spacegroup_symbol: A spacegroup symbol.
Returns:
The unicode formatted spacegroup symbol.
"""
subscript_unicode_map = {
0: "₀",
1: "₁",
2: "₂",
3: "₃",
4: "₄",
5: "₅",
6: "₆",
7: "₇",
8: "₈",
9: "₉",
}
symbol = latexify_spacegroup(spacegroup_symbol)
for number, unicode_number in subscript_unicode_map.items():
symbol = symbol.replace("$_{" + str(number) + "}$", unicode_number)
overline = "\u0305" # u"\u0304" (macron) is also an option
symbol = symbol.replace("$\\overline{", overline)
symbol = symbol.replace("$", "")
symbol = symbol.replace("{", "")
symbol = symbol.replace("}", "")
return symbol
[docs]def htmlify_spacegroup(spacegroup_symbol: str) -> str:
"""Formats a spacegroup using unicode symbols.
E.g. P-42_1m -> P̅42<sub>1</sub>m
Args:
spacegroup_symbol: A spacegroup symbol.
Returns:
The html formatted spacegroup symbol.
"""
overline = "\u0305" # u"\u0304" (macron) is also an option
symbol = re.sub(r"_(\d+)", r"<sub>\1</sub>", spacegroup_symbol)
symbol = re.sub(r"-(\d)", rf"{overline}\1", symbol)
return symbol
[docs]def defaultdict_to_dict(dictionary: defaultdict) -> dict:
"""Recursively convert nested :obj:`defaultdict` to :obj:`dict`.
Args:
dictionary: A defaultdict.
Returns:
The defaultdict as a :obj:`dict`.
"""
if isinstance(dictionary, defaultdict):
return {k: defaultdict_to_dict(v) for k, v in dictionary.items()}
return dictionary
[docs]def load_condensed_structure_json(filename: str) -> dict[str, Any]:
"""Load condensed structure data from a file.
Args:
filename: The filename.
Returns:
The condensed structure data.
"""
# JSON does not support using integers a dictionary keys, therefore
# manually convert dictionary keys from str to int if possible.
def json_keys_to_int(x : Any) -> Any:
if isinstance(x,dict):
return {int(k) if k.isdigit() else k: json_keys_to_int(v) for k, v in x.items()}
return x
# For some reason, specifying `object_hook = json_keys_to_int` in `loadfn`
# doesn't seem to work. This does reliably:
return json_keys_to_int(loadfn(filename, cls=MontyDecoder))