1a_BNE_workflow.py — Compute Bond-Network Entropy

Computes Bond-Network Entropy (BNE) for the disordered system across all LAE sizes defined in config.LOCAL_ENVIRONMENT_NAT. For each size N it:

  1. Reads the disordered structure from config.DISORDERED_POSCAR.

  2. Computes pairwise MIC distances up to config.N_SMALLEST neighbours.

  3. Extracts the H₁ barcode for every atom’s local environment of size N.

  4. Collects the barcode distribution and computes the Shannon entropy (BNE).

  5. Saves results to <BNE_WORK_DIR>/<BNE_FOLDER>/structure_0/entropy_number_<N>.hdf5 (keys: entropy, probabilities, number_of_atoms).

Prerequisites: BNE installation (bash setup_bne.sh).

cd workflows
python 1a_BNE_workflow.py
  1"""
  25_BNE_workflow.py — Compute Bond-Network Entropy for a range of LAE sizes
  3==========================================================================
  4
  5This script calculates the Bond-Network Entropy (BNE) for a given structure and saves the result to HDF5 files.  
  6
  7How to run
  8----------
  9From the repository root:
 10
 11    python workflows/5_BNE_workflow.py
 12
 13
 14Output
 15------
 16One HDF5 file per LAE size, saved as:
 17    data/bond_network_entropy/structure_0/entropy_number_<N>.hdf5
 18
 19Each file stores:
 20    - "entropy"         : scalar BNE value
 21    - "probabilities"   : array of barcode class probabilities
 22    - "number_of_atoms" : LAE size N used for this calculation
 23"""
 24
 25import os
 26import sys
 27import pathlib
 28sys.path.insert(0, str(pathlib.Path(__file__).parent))
 29
 30import numpy as np
 31import h5py
 32from tqdm import tqdm
 33
 34# obtain_distances_big_structures is the fast manual MIC implementation suited
 35# for large structures.  It operates directly on NumPy arrays (positions and
 36# lattice vectors) rather than an ASE Atoms object, which avoids per-atom
 37# Python overhead inside the ASE call.
 38#
 39# WARNING — known limitations of this implementation:
 40#   1. It is only *exact* for orthorhombic cells (a, b, c mutually perpendicular).
 41#   2. For monoclinic or triclinic cells the single-image MIC wrap should only be accurate
 42#.     for distances shorter than the half the smallest cell dimension (d < min(|a|, |b|, |c|) / 2).
 43#      Distances beyond that threshold may be assigned to the wrong periodic image.
 44#      For the Si–O bond cutoff of 2.1 Å and an orthorhombic silica unit cell this is fine,
 45#      but always verify before applying to a different structure.
 46#
 47# For non-orthorhombic cells and large cutoffs, use obtain_distances_ase instead.
 48from smooth_disorder.structural import obtain_distances_big_structures, obtain_positions_and_lattice_vectors
 49
 50
 51from smooth_disorder.barcode import (
 52    obtain_local_number_environment_big_structures,
 53    obtain_H1_barcode,
 54    reduce_barcode,
 55    mu,
 56)
 57
 58from config import (
 59    BNE_WORK_DIR, BNE_FOLDER,
 60    DISORDERED_POSCAR,
 61    CUTOFF, N_SMALLEST, STRUCTURE_IDX, LOCAL_ENVIRONMENT_NAT,
 62)
 63
 64
 65# ---------------------------------------------------------------------------
 66# Saving results
 67# ---------------------------------------------------------------------------
 68
 69def save_bne_data_to_files(filename, probabilities, entropy, LE_nat, filepath):
 70    """
 71    Save BNE results for one LAE size to an HDF5 file.
 72
 73    Parameters
 74    ----------
 75    filename : str
 76        Output file path without the .hdf5 extension.
 77    probabilities : np.ndarray
 78        Empirical probability of each barcode equivalence class.
 79    entropy : float
 80        Shannon entropy (BNE) computed from the probabilities.
 81    LE_nat : int
 82        Number of atoms in the local atomic environment (LAE size).
 83    filepath : str
 84        Path to the input structure file (stored for reference).
 85    """
 86    with h5py.File(f"{filename}.hdf5", "w") as w:
 87        w.create_dataset("probabilities", data=probabilities, compression="gzip")
 88        w.create_dataset("entropy", data=np.array([entropy]), compression="gzip")
 89        w.create_dataset("number_of_atoms", data=np.array([LE_nat]), compression="gzip")
 90
 91
 92# ---------------------------------------------------------------------------
 93# Main calculation
 94# ---------------------------------------------------------------------------
 95
 96def calculate_barcode(distances, idx_distances, adjacency_matrix, n_atoms, save_folder, structure_idx, filepath, local_environment_nat):
 97    """
 98    Compute BNE for a range of LAE sizes and save results to disk.
 99
100    For each LAE size N:
101      1. Extract the N-atom local environment around every atom using BFS.
102      2. Compute the H1 barcode (ring fingerprint) for each local environment.
103      3. Count how many atoms share the same barcode.
104      4. Convert counts to probabilities and compute Shannon entropy = BNE.
105      5. Save results to an HDF5 file.
106
107    Parameters
108    ----------
109    distances : np.ndarray, shape (n_atoms, n_smallest)
110        Pre-computed sorted nearest-neighbour distances (Ångström).
111    idx_distances : np.ndarray, shape (n_atoms, n_smallest)
112        Global atom indices corresponding to those distances.
113    adjacency_matrix : np.ndarray, shape (n_atoms, n_smallest)
114        Binary matrix: entry [i, j] = 1 if atoms i and idx_distances[i, j] are bonded.
115    n_atoms : int
116        Total number of atoms in the structure.
117    save_folder : str
118        Root directory for output HDF5 files.
119    structure_idx : int
120        Index label for this structure (used in the output subdirectory name).
121    filepath : str
122        Path to the original structure file.
123    local_environment_nat : list of int
124        LAE sizes to compute BNE for.  Defined in the ``__main__`` block.
125    """
126    # Create the output directory if it does not exist yet.
127    os.makedirs(f"{save_folder}/structure_{structure_idx}", exist_ok=True)
128
129    for LE_nat in local_environment_nat:
130        print(f"Current LAE size = {LE_nat}")
131
132        # Gs collects barcodes grouped by their shape (dimensions of the array).
133        # Two reduced barcodes with different shapes can never be equal, so grouping by
134        # shape first makes the equality checks much faster.
135        Gs = {}
136
137        for atom_index in tqdm(range(n_atoms), desc="Atoms"):
138
139            # Step 1 — extract the local atomic environment (LAE).
140            # Starting from atom_index, we grow outward by BFS until we have
141            # included LE_nat atoms.  The result is a small sub-graph.
142            local_adjacency_matrix, layers, local_atom_index, global_index = \
143                obtain_local_number_environment_big_structures(
144                    adjacency_matrix=adjacency_matrix,
145                    atom_index=atom_index,
146                    distances=distances,
147                    idx_distances=idx_distances,
148                    n_environment_atoms=LE_nat,
149                )
150
151            # Step 2 — compute the H1 barcode of the local sub-graph.
152            G, F = obtain_H1_barcode(
153                adjacency_matrix=local_adjacency_matrix,
154                layers=layers,
155                mu=mu,
156            )
157
158            # Step 3 — reduce the barcode to its canonical form so that two
159            # topologically identical environments always give the same array.
160            G = reduce_barcode(G)
161
162            # Accumulate barcodes, grouped by shape for efficient comparison.
163            key = G.shape
164            Gs.setdefault(key, []).append(G)
165
166        # Step 4 — count equivalence classes (atoms with identical barcodes).
167        # np.unique(..., axis=0) finds the distinct rows across all barcodes
168        # that share the same shape, and counts how many times each barcode appears.
169        class_counts = []
170        for key, value in Gs.items():
171            _, class_count = np.unique(value, return_counts=True, axis=0)
172            class_counts.extend(class_count)
173
174        class_counts = np.array(class_counts)
175
176        # Convert raw counts to probabilities (must sum to 1).
177        probabilities = class_counts / class_counts.sum()
178
179        # Step 5 — Shannon entropy: BNE = -sum_i p_i * log(p_i).
180        entropy = -(probabilities * np.log(probabilities)).sum()
181
182        # Save results for this LAE size.
183        BNE_save_filename = f"{save_folder}/structure_{structure_idx}/entropy_number_{LE_nat}"
184        save_bne_data_to_files(BNE_save_filename, probabilities, entropy, LE_nat, filepath)
185
186
187# ---------------------------------------------------------------------------
188# Entry point
189# ---------------------------------------------------------------------------
190
191if __name__ == "__main__":
192
193    save_folder = f"{BNE_WORK_DIR}/{BNE_FOLDER}"
194
195    # Input structure: silica glass with 5184 atoms.
196    # Downloaded from https://www.pnas.org/doi/abs/10.1073/pnas.2422763122
197    atomic_positions, lattice_vectors = obtain_positions_and_lattice_vectors(DISORDERED_POSCAR)
198    n_atoms = len(atomic_positions)
199
200    # Compute nearest-neighbour distances for every atom.
201    distances, idx_distances = obtain_distances_big_structures(atomic_positions, lattice_vectors, n_smallest=N_SMALLEST)
202
203    # Build the adjacency matrix:
204    adjacency_matrix = ((distances < CUTOFF) & (distances > 0.1)).astype(int)
205
206    calculate_barcode(distances, idx_distances, adjacency_matrix, n_atoms, save_folder, STRUCTURE_IDX, DISORDERED_POSCAR, LOCAL_ENVIRONMENT_NAT)