4a_prepare_inputs_for_dl_fitting.py — Prepare DL Fitting Inputs

Extracts phonon frequencies and diffusivities from data_save/IC_dataset_tensor.npz (produced by step 3b) and saves them to irg_t9_216_frequencies.hdf5 and irg_t9_216_diffusivity.hdf5. This file is the bridge between the Allen-Feldman diffusivity workflow and the Disorder Linewidth fitting workflow.

Run after 3b_tensor_conductivity_save_process.py.

cd tutorials/diffusivity
python 4a_prepare_inputs_for_dl_fitting.py
  1import string, re, struct, sys, math, os
  2
  3import numpy as np
  4import pandas as pd
  5
  6from ase.io import read, write
  7from typing import Tuple
  8
  9import h5py
 10import matplotlib.pyplot as plt
 11
 12
 13# save the frequencies
 14def save_frequency_data_to_files(filename, frequencies, weights):
 15    compression="gzip"
 16    with h5py.File(f"{filename}.hdf5", "w") as w:
 17        w.create_dataset("frequencies", data=frequencies, compression=compression)
 18        w.create_dataset("weights", data=weights, compression=compression)
 19
 20
 21data_filename = "./data_save/IC_dataset_tensor.npz"
 22raw = np.load(data_filename)
 23
 24save_filename = "./irg_t9_216_frequencies"
 25weights = raw['weights']
 26frequencies = raw['frequencies']
 27save_frequency_data_to_files(save_filename, frequencies, weights)
 28
 29
 30
 31# calculate the diffusivity as a function of frequency and save it
 32def diffusivity_w(frequencies: np.ndarray, weights: np.ndarray, gamma_min_plateau: float,
 33                 diffusivities: np.ndarray, structure_file) -> \
 34        Tuple[np.ndarray, np.ndarray]:
 35    """
 36    Computes diffusivity as function of frequency
 37
 38    """
 39
 40    width_frequency = gamma_min_plateau / 10
 41    min_frequency = sorted(frequencies.flatten())[3]
 42    max_frequency = np.amax(frequencies)
 43    sigma_gauss = np.sqrt(np.pi / 2.0) * gamma_min_plateau  # in cmm^-1
 44    num_q = weights.sum()
 45
 46    frequencies_plot = np.arange(min_frequency - sigma_gauss, max_frequency + sigma_gauss, width_frequency)
 47    counts_all = np.zeros((len(frequencies_plot),))
 48    diffusivities_all = np.zeros((len(frequencies_plot,)))
 49
 50    def gaussian(x, sigma=sigma_gauss):
 51        result = (1.0 / (np.sqrt(2.0 * np.pi) * sigma)) * np.exp(-0.5 * (x / sigma) ** 2)
 52        return np.where(np.abs(x) > 2.5 * sigma, 0, result)
 53
 54    
 55    # MAIN calculation
 56    for iq in range(len(weights)):
 57        weight = weights[iq]
 58        freq_modes = frequencies[iq]
 59        diffusivity = diffusivities[iq]
 60
 61        for idx_bin in range(len(frequencies_plot)):
 62            frequency = frequencies_plot[idx_bin]
 63
 64            bool_condition2 = np.abs(freq_modes - frequency) < 2.5 * sigma_gauss  # for frequency
 65
 66
 67            # shape: (num_modes close to frequency,)
 68            gauss_distr = gaussian(freq_modes[bool_condition2] - frequency)
 69
 70            counts_all[idx_bin] += (weight * gauss_distr).sum()
 71            diffusivities_all[idx_bin] += (diffusivity[bool_condition2] * weight * gauss_distr).sum()
 72
 73    cell = read(structure_file)
 74    volume_A3 = cell.get_volume()
 75    
 76    diffusivities_all = np.where(np.isclose(counts_all, 0.0), 0.0, diffusivities_all/counts_all)
 77
 78    return counts_all / (volume_A3 * num_q), diffusivities_all, frequencies_plot
 79
 80
 81def obtain_gamma_plateau(infile: str = "data_save/IC_dataset.npz") -> float:
 82    """
 83    Returns gamma_min_plateau used to obtain quantities in the dataset
 84    :param infile: Location of the dataset
 85    :return: gamma_plateau
 86    """
 87    raw = np.load(infile)
 88    gamma_plateau = float(np.copy(raw['gamma_min_plateau']))
 89    return gamma_plateau
 90
 91
 92
 93# read data
 94source = f"./data_save/IC_dataset_tensor.npz"
 95IC_dataset = np.load(source)
 96
 97temperatures = np.copy(IC_dataset['temperatures'])
 98weights = np.copy(IC_dataset['weights'])
 99freq = np.copy(IC_dataset['frequencies'])
100
101density = np.copy(IC_dataset['density'])
102
103diffusivity_Allen = np.copy(raw['diff_Allen'])
104diffusivity_Allen = np.diagonal(diffusivity_Allen[:, :, :, :], axis1=2, axis2=3).sum(axis=2) / 3
105
106atomic_file = f"./POSCAR"
107gamma_plateau = obtain_gamma_plateau(source)
108
109# diffusivity as a function of frequency
110_, diffusivity_plot, frequencies_plot = diffusivity_w(freq, weights, gamma_plateau, diffusivity_Allen, atomic_file)
111
112
113
114# save data
115save_dict = {'frequencies': frequencies_plot, 'diffusivities': diffusivity_plot, 'gamma_plateau': gamma_plateau}
116
117cmd = f"mkdir -p ./data_save/diffusivity_w"
118os.system(cmd)
119
120outfile = f'./data_save/diffusivity_w/diffusivity_Allen_eta_conv.npz'
121np.savez(outfile, **save_dict)
122
123
124raw = np.load('./data_save/diffusivity_w/diffusivity_Allen_eta_conv.npz')
125frequencies_diffusivity = raw['frequencies']
126diffusivities = raw['diffusivities']
127
128compression="gzip"
129with h5py.File(f"irg_t9_216_diffusivity.hdf5", "w") as w:
130    w.create_dataset("diffusivity", data=diffusivities, compression=compression)
131    w.create_dataset("frequencies_plot", data=frequencies_diffusivity, compression=compression)
132
133
134plt.figure(figsize=(16, 8))
135
136plt.plot(frequencies_diffusivity, diffusivities)
137plt.xlabel('Frequencies [cm^{-1}]')
138plt.ylabel(r'Diffusivity [m^2 / s]')
139
140plt.savefig("./diffusivity_plot.png", dpi=300)