Skip to content

cclib

Schemas for molecular DFT codes parsed by cclib.

LOGGER module-attribute

LOGGER = getLogger(__name__)

cclib_summarize_run

cclib_summarize_run(
    final_atoms: Atoms,
    logfile_extensions: str | list[str],
    directory: Path | str | None = None,
    pop_analyses: (
        list[
            Literal[
                "cpsa",
                "mpa",
                "lpa",
                "bickelhaupt",
                "density",
                "mbo",
                "bader",
                "ddec6",
                "hirshfeld",
            ]
        ]
        | None
    ) = None,
    check_convergence: bool = _DEFAULT_SETTING,
    additional_fields: dict[str, Any] | None = None,
    store: Store | None = _DEFAULT_SETTING,
) -> cclibSchema

Get tabulated results from a molecular DFT run and store them in a database-friendly format. This is meant to be a general parser built on top of cclib.

Parameters:

  • final_atoms (Atoms) –

    ASE Atoms object following a calculation.

  • logfile_extensions (str | list[str]) –

    Possible extensions of the log file (e.g. ".log", ".out", ".txt", ".chk"). Note that only a partial match is needed. For instance, .log will match .log.gz and .log.1.gz. If multiple files with this extension are found, the one with the most recent change time will be used. For an exact match only, put in the full file name.

  • directory (Path | str | None, default: None ) –

    The path to the folder containing the calculation outputs. A value of None specifies the calculator directory.

  • pop_analyses (list[Literal['cpsa', 'mpa', 'lpa', 'bickelhaupt', 'density', 'mbo', 'bader', 'ddec6', 'hirshfeld']] | None, default: None ) –

    The name(s) of any cclib post-processing analysis to run. Note that for bader, ddec6, and hirshfeld, a cube file (.cube, .cub) must reside in directory. Supports: "cpsa", "mpa", "lpa", "bickelhaupt", "density", "mbo", "bader", "ddec6", "hirshfeld".

  • check_convergence (bool, default: _DEFAULT_SETTING ) –

    Whether to throw an error if geometry optimization convergence is not reached. Defaults to True in settings.

  • additional_fields (dict[str, Any] | None, default: None ) –

    Additional fields to add to the task document.

  • store (Store | None, default: _DEFAULT_SETTING ) –

    Maggma Store object to store the results in. Defaults to SETTINGS.STORE

Returns:

  • cclibSchema

    Dictionary representation of the task document

Source code in quacc/schemas/cclib.py
def cclib_summarize_run(
    final_atoms: Atoms,
    logfile_extensions: str | list[str],
    directory: Path | str | None = None,
    pop_analyses: (
        list[
            Literal[
                "cpsa",
                "mpa",
                "lpa",
                "bickelhaupt",
                "density",
                "mbo",
                "bader",
                "ddec6",
                "hirshfeld",
            ]
        ]
        | None
    ) = None,
    check_convergence: bool = _DEFAULT_SETTING,
    additional_fields: dict[str, Any] | None = None,
    store: Store | None = _DEFAULT_SETTING,
) -> cclibSchema:
    """
    Get tabulated results from a molecular DFT run and store them in a database-friendly
    format. This is meant to be a general parser built on top of cclib.

    Parameters
    ----------
    final_atoms
        ASE Atoms object following a calculation.
    logfile_extensions
        Possible extensions of the log file (e.g. ".log", ".out", ".txt",
        ".chk"). Note that only a partial match is needed. For instance, `.log`
        will match `.log.gz` and `.log.1.gz`. If multiple files with this
        extension are found, the one with the most recent change time will be
        used. For an exact match only, put in the full file name.
    directory
        The path to the folder containing the calculation outputs. A value of
        None specifies the calculator directory.
    pop_analyses
        The name(s) of any cclib post-processing analysis to run. Note that for
        bader, ddec6, and hirshfeld, a cube file (.cube, .cub) must reside in
        directory. Supports: "cpsa", "mpa", "lpa", "bickelhaupt", "density",
        "mbo", "bader", "ddec6", "hirshfeld".
    check_convergence
         Whether to throw an error if geometry optimization convergence is not
         reached. Defaults to True in settings.
    additional_fields
        Additional fields to add to the task document.
    store
        Maggma Store object to store the results in. Defaults to `SETTINGS.STORE`

    Returns
    -------
    cclibSchema
        Dictionary representation of the task document
    """
    directory = Path(directory or final_atoms.calc.directory)
    check_convergence = (
        SETTINGS.CHECK_CONVERGENCE
        if check_convergence == _DEFAULT_SETTING
        else check_convergence
    )
    store = SETTINGS.STORE if store == _DEFAULT_SETTING else store
    additional_fields = additional_fields or {}

    # Get the cclib base task document
    cclib_task_doc = _make_cclib_schema(
        directory, logfile_extensions, analysis=pop_analyses
    )
    attributes = cclib_task_doc["attributes"]
    metadata = attributes["metadata"]

    if check_convergence and attributes.get("optdone") is False:
        msg = f"Optimization not complete. Refer to {directory}"
        raise RuntimeError(msg)

    # Now we construct the input Atoms object. Note that this is not necessarily
    # the same as the initial Atoms from the relaxation because the DFT
    # package may have re-oriented the system. We only try to store the
    # input if it is XYZ-formatted though since the Atoms object does not
    # support internal coordinates or Gaussian Z-matrix.
    if metadata.get("coord_type") == "xyz" and metadata.get("coords") is not None:
        coords_obj = metadata["coords"]
        symbols = [row[0] for row in coords_obj]
        positions = [row[1:] for row in coords_obj]
        input_atoms = Atoms(symbols=symbols, positions=positions)
    else:
        input_atoms = cclib_task_doc["trajectory"][0]

    if nsteps := len([f for f in os.listdir(directory) if f.startswith("step")]):
        intermediate_cclib_task_docs = {
            "steps": {
                n: _make_cclib_schema(Path(directory, f"step{n}"), logfile_extensions)
                for n in range(nsteps)
            }
        }
    else:
        intermediate_cclib_task_docs = {}

    # Get the base task document for the ASE run
    run_task_doc = summarize_run(
        final_atoms,
        input_atoms,
        charge_and_multiplicity=(attributes["charge"], attributes["mult"]),
        store=None,
    )

    # Create a dictionary of the inputs/outputs
    unsorted_task_doc = (
        run_task_doc | intermediate_cclib_task_docs | cclib_task_doc | additional_fields
    )
    return finalize_dict(
        unsorted_task_doc, directory, gzip_file=SETTINGS.GZIP_FILES, store=store
    )

summarize_cclib_opt_run

summarize_cclib_opt_run(
    optimizer: Optimizer,
    logfile_extensions: str | list[str],
    trajectory: Trajectory | list[Atoms] | None = None,
    directory: Path | str | None = None,
    pop_analyses: (
        list[
            Literal[
                "cpsa",
                "mpa",
                "lpa",
                "bickelhaupt",
                "density",
                "mbo",
                "bader",
                "ddec6",
                "hirshfeld",
            ]
        ]
        | None
    ) = None,
    check_convergence: bool = _DEFAULT_SETTING,
    additional_fields: dict[str, Any] | None = None,
    store: Store | None = _DEFAULT_SETTING,
) -> cclibASEOptSchema

Merges the results of a cclib run with the results of an ASE optimizer run.

Parameters:

  • optimizer (Optimizer) –

    The ASE optimizer object

  • logfile_extensions (str | list[str]) –

    Possible extensions of the log file (e.g. ".log", ".out", ".txt", ".chk"). Note that only a partial match is needed. For instance, .log will match .log.gz and .log.1.gz. If multiple files with this extension are found, the one with the most recent change time will be used. For an exact match only, put in the full file name.

  • trajectory (Trajectory | list[Atoms] | None, default: None ) –

    ASE Trajectory object or list[Atoms] from reading a trajectory file. If None, the trajectory must be found in dyn.traj_atoms.

  • directory (Path | str | None, default: None ) –

    The path to the folder containing the calculation outputs. A value of None specifies the calculator directory.

  • pop_analyses (list[Literal['cpsa', 'mpa', 'lpa', 'bickelhaupt', 'density', 'mbo', 'bader', 'ddec6', 'hirshfeld']] | None, default: None ) –

    The name(s) of any cclib post-processing analysis to run. Note that for bader, ddec6, and hirshfeld, a cube file (.cube, .cub) must reside in directory. Supports: "cpsa", "mpa", "lpa", "bickelhaupt", "density", "mbo", "bader", "ddec6", "hirshfeld".

  • check_convergence (bool, default: _DEFAULT_SETTING ) –

    Whether to throw an error if geometry optimization convergence is not reached. Defaults to True in settings.

  • additional_fields (dict[str, Any] | None, default: None ) –

    Additional fields to add to the task document.

  • store (Store | None, default: _DEFAULT_SETTING ) –

    Maggma Store object to store the results in. Defaults to SETTINGS.STORE

Returns:

Source code in quacc/schemas/cclib.py
def summarize_cclib_opt_run(
    optimizer: Optimizer,
    logfile_extensions: str | list[str],
    trajectory: Trajectory | list[Atoms] | None = None,
    directory: Path | str | None = None,
    pop_analyses: (
        list[
            Literal[
                "cpsa",
                "mpa",
                "lpa",
                "bickelhaupt",
                "density",
                "mbo",
                "bader",
                "ddec6",
                "hirshfeld",
            ]
        ]
        | None
    ) = None,
    check_convergence: bool = _DEFAULT_SETTING,
    additional_fields: dict[str, Any] | None = None,
    store: Store | None = _DEFAULT_SETTING,
) -> cclibASEOptSchema:
    """
    Merges the results of a cclib run with the results of an ASE optimizer run.

    Parameters
    ----------
    optimizer
        The ASE optimizer object
    logfile_extensions
        Possible extensions of the log file (e.g. ".log", ".out", ".txt",
        ".chk"). Note that only a partial match is needed. For instance, `.log`
        will match `.log.gz` and `.log.1.gz`. If multiple files with this
        extension are found, the one with the most recent change time will be
        used. For an exact match only, put in the full file name.
    trajectory
        ASE Trajectory object or list[Atoms] from reading a trajectory file. If
        None, the trajectory must be found in dyn.traj_atoms.
    directory
        The path to the folder containing the calculation outputs. A value of
        None specifies the calculator directory.
    pop_analyses
        The name(s) of any cclib post-processing analysis to run. Note that for
        bader, ddec6, and hirshfeld, a cube file (.cube, .cub) must reside in
        directory. Supports: "cpsa", "mpa", "lpa", "bickelhaupt", "density",
        "mbo", "bader", "ddec6", "hirshfeld".
    check_convergence
         Whether to throw an error if geometry optimization convergence is not
         reached. Defaults to True in settings.
    additional_fields
        Additional fields to add to the task document.
    store
        Maggma Store object to store the results in. Defaults to `SETTINGS.STORE`

    Returns
    -------
    cclibASEOptSchema
        Dictionary representation of the task document
    """
    store = SETTINGS.STORE if store == _DEFAULT_SETTING else store

    final_atoms = get_final_atoms_from_dynamics(optimizer)
    directory = Path(directory or final_atoms.calc.directory)
    cclib_summary = cclib_summarize_run(
        final_atoms,
        logfile_extensions,
        directory=directory,
        pop_analyses=pop_analyses,
        check_convergence=check_convergence,
        additional_fields=additional_fields,
        store=None,
    )
    opt_run_summary = summarize_opt_run(
        optimizer,
        trajectory=trajectory,
        check_convergence=check_convergence,
        charge_and_multiplicity=(
            cclib_summary["charge"],
            cclib_summary["spin_multiplicity"],
        ),
        additional_fields=additional_fields,
        store=None,
    )
    unsorted_task_doc = recursive_dict_merge(cclib_summary, opt_run_summary)
    return finalize_dict(
        unsorted_task_doc, directory, gzip_file=SETTINGS.GZIP_FILES, store=store
    )