1a_BNE_workflow.py — Compute Bond-Network Entropy¶
Computes Bond-Network Entropy (BNE) for the disordered system across all LAE
sizes defined in config.LOCAL_ENVIRONMENT_NAT. For each size N it:
Reads the disordered structure from
config.DISORDERED_POSCAR.Computes pairwise MIC distances up to
config.N_SMALLESTneighbours.Extracts the H₁ barcode for every atom’s local environment of size N.
Collects the barcode distribution and computes the Shannon entropy (BNE).
Saves results to
<BNE_WORK_DIR>/<BNE_FOLDER>/structure_0/entropy_number_<N>.hdf5(keys:entropy,probabilities,number_of_atoms).
Prerequisites: BNE installation (bash setup_bne.sh).
cd workflows
python 1a_BNE_workflow.py
1"""
25_BNE_workflow.py — Compute Bond-Network Entropy for a range of LAE sizes
3==========================================================================
4
5This script calculates the Bond-Network Entropy (BNE) for a given structure and saves the result to HDF5 files.
6
7How to run
8----------
9From the repository root:
10
11 python workflows/5_BNE_workflow.py
12
13
14Output
15------
16One HDF5 file per LAE size, saved as:
17 data/bond_network_entropy/structure_0/entropy_number_<N>.hdf5
18
19Each file stores:
20 - "entropy" : scalar BNE value
21 - "probabilities" : array of barcode class probabilities
22 - "number_of_atoms" : LAE size N used for this calculation
23"""
24
25import os
26import sys
27import pathlib
28sys.path.insert(0, str(pathlib.Path(__file__).parent))
29
30import numpy as np
31import h5py
32from tqdm import tqdm
33
34# obtain_distances_big_structures is the fast manual MIC implementation suited
35# for large structures. It operates directly on NumPy arrays (positions and
36# lattice vectors) rather than an ASE Atoms object, which avoids per-atom
37# Python overhead inside the ASE call.
38#
39# WARNING — known limitations of this implementation:
40# 1. It is only *exact* for orthorhombic cells (a, b, c mutually perpendicular).
41# 2. For monoclinic or triclinic cells the single-image MIC wrap should only be accurate
42#. for distances shorter than the half the smallest cell dimension (d < min(|a|, |b|, |c|) / 2).
43# Distances beyond that threshold may be assigned to the wrong periodic image.
44# For the Si–O bond cutoff of 2.1 Å and an orthorhombic silica unit cell this is fine,
45# but always verify before applying to a different structure.
46#
47# For non-orthorhombic cells and large cutoffs, use obtain_distances_ase instead.
48from smooth_disorder.structural import obtain_distances_big_structures, obtain_positions_and_lattice_vectors
49
50
51from smooth_disorder.barcode import (
52 obtain_local_number_environment_big_structures,
53 obtain_H1_barcode,
54 reduce_barcode,
55 mu,
56)
57
58from config import (
59 BNE_WORK_DIR, BNE_FOLDER,
60 DISORDERED_POSCAR,
61 CUTOFF, N_SMALLEST, STRUCTURE_IDX, LOCAL_ENVIRONMENT_NAT,
62)
63
64
65# ---------------------------------------------------------------------------
66# Saving results
67# ---------------------------------------------------------------------------
68
69def save_bne_data_to_files(filename, probabilities, entropy, LE_nat, filepath):
70 """
71 Save BNE results for one LAE size to an HDF5 file.
72
73 Parameters
74 ----------
75 filename : str
76 Output file path without the .hdf5 extension.
77 probabilities : np.ndarray
78 Empirical probability of each barcode equivalence class.
79 entropy : float
80 Shannon entropy (BNE) computed from the probabilities.
81 LE_nat : int
82 Number of atoms in the local atomic environment (LAE size).
83 filepath : str
84 Path to the input structure file (stored for reference).
85 """
86 with h5py.File(f"{filename}.hdf5", "w") as w:
87 w.create_dataset("probabilities", data=probabilities, compression="gzip")
88 w.create_dataset("entropy", data=np.array([entropy]), compression="gzip")
89 w.create_dataset("number_of_atoms", data=np.array([LE_nat]), compression="gzip")
90
91
92# ---------------------------------------------------------------------------
93# Main calculation
94# ---------------------------------------------------------------------------
95
96def calculate_barcode(distances, idx_distances, adjacency_matrix, n_atoms, save_folder, structure_idx, filepath, local_environment_nat):
97 """
98 Compute BNE for a range of LAE sizes and save results to disk.
99
100 For each LAE size N:
101 1. Extract the N-atom local environment around every atom using BFS.
102 2. Compute the H1 barcode (ring fingerprint) for each local environment.
103 3. Count how many atoms share the same barcode.
104 4. Convert counts to probabilities and compute Shannon entropy = BNE.
105 5. Save results to an HDF5 file.
106
107 Parameters
108 ----------
109 distances : np.ndarray, shape (n_atoms, n_smallest)
110 Pre-computed sorted nearest-neighbour distances (Ångström).
111 idx_distances : np.ndarray, shape (n_atoms, n_smallest)
112 Global atom indices corresponding to those distances.
113 adjacency_matrix : np.ndarray, shape (n_atoms, n_smallest)
114 Binary matrix: entry [i, j] = 1 if atoms i and idx_distances[i, j] are bonded.
115 n_atoms : int
116 Total number of atoms in the structure.
117 save_folder : str
118 Root directory for output HDF5 files.
119 structure_idx : int
120 Index label for this structure (used in the output subdirectory name).
121 filepath : str
122 Path to the original structure file.
123 local_environment_nat : list of int
124 LAE sizes to compute BNE for. Defined in the ``__main__`` block.
125 """
126 # Create the output directory if it does not exist yet.
127 os.makedirs(f"{save_folder}/structure_{structure_idx}", exist_ok=True)
128
129 for LE_nat in local_environment_nat:
130 print(f"Current LAE size = {LE_nat}")
131
132 # Gs collects barcodes grouped by their shape (dimensions of the array).
133 # Two reduced barcodes with different shapes can never be equal, so grouping by
134 # shape first makes the equality checks much faster.
135 Gs = {}
136
137 for atom_index in tqdm(range(n_atoms), desc="Atoms"):
138
139 # Step 1 — extract the local atomic environment (LAE).
140 # Starting from atom_index, we grow outward by BFS until we have
141 # included LE_nat atoms. The result is a small sub-graph.
142 local_adjacency_matrix, layers, local_atom_index, global_index = \
143 obtain_local_number_environment_big_structures(
144 adjacency_matrix=adjacency_matrix,
145 atom_index=atom_index,
146 distances=distances,
147 idx_distances=idx_distances,
148 n_environment_atoms=LE_nat,
149 )
150
151 # Step 2 — compute the H1 barcode of the local sub-graph.
152 G, F = obtain_H1_barcode(
153 adjacency_matrix=local_adjacency_matrix,
154 layers=layers,
155 mu=mu,
156 )
157
158 # Step 3 — reduce the barcode to its canonical form so that two
159 # topologically identical environments always give the same array.
160 G = reduce_barcode(G)
161
162 # Accumulate barcodes, grouped by shape for efficient comparison.
163 key = G.shape
164 Gs.setdefault(key, []).append(G)
165
166 # Step 4 — count equivalence classes (atoms with identical barcodes).
167 # np.unique(..., axis=0) finds the distinct rows across all barcodes
168 # that share the same shape, and counts how many times each barcode appears.
169 class_counts = []
170 for key, value in Gs.items():
171 _, class_count = np.unique(value, return_counts=True, axis=0)
172 class_counts.extend(class_count)
173
174 class_counts = np.array(class_counts)
175
176 # Convert raw counts to probabilities (must sum to 1).
177 probabilities = class_counts / class_counts.sum()
178
179 # Step 5 — Shannon entropy: BNE = -sum_i p_i * log(p_i).
180 entropy = -(probabilities * np.log(probabilities)).sum()
181
182 # Save results for this LAE size.
183 BNE_save_filename = f"{save_folder}/structure_{structure_idx}/entropy_number_{LE_nat}"
184 save_bne_data_to_files(BNE_save_filename, probabilities, entropy, LE_nat, filepath)
185
186
187# ---------------------------------------------------------------------------
188# Entry point
189# ---------------------------------------------------------------------------
190
191if __name__ == "__main__":
192
193 save_folder = f"{BNE_WORK_DIR}/{BNE_FOLDER}"
194
195 # Input structure: silica glass with 5184 atoms.
196 # Downloaded from https://www.pnas.org/doi/abs/10.1073/pnas.2422763122
197 atomic_positions, lattice_vectors = obtain_positions_and_lattice_vectors(DISORDERED_POSCAR)
198 n_atoms = len(atomic_positions)
199
200 # Compute nearest-neighbour distances for every atom.
201 distances, idx_distances = obtain_distances_big_structures(atomic_positions, lattice_vectors, n_smallest=N_SMALLEST)
202
203 # Build the adjacency matrix:
204 adjacency_matrix = ((distances < CUTOFF) & (distances > 0.1)).astype(int)
205
206 calculate_barcode(distances, idx_distances, adjacency_matrix, n_atoms, save_folder, STRUCTURE_IDX, DISORDERED_POSCAR, LOCAL_ENVIRONMENT_NAT)