Advertisement
shihab2196

bondfile_parser_module_12-10-2023

Dec 10th, 2023
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 83.13 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Thu Feb 23 23:56:22 2023 (Version 1.0)
  4. Version 2.0 released on June 1, 2023
  5. last modified: 12/10/2023
  6.  
  7. @author: shihab
  8.  
  9. This is my own LAMMPS post-processing library
  10. """
  11.  
  12. import networkx as nx
  13. from collections import Counter
  14. import re
  15. import sys
  16. import math
  17. import time
  18. import pandas as pd
  19. import matplotlib.pyplot as plt
  20. import seaborn as sns
  21. from matplotlib.colors import LogNorm
  22. import random
  23. import os
  24. import pickle
  25. import json
  26. from functools import wraps
  27. import numpy as np
  28. from scipy.optimize import curve_fit, newton
  29. from rdkit import Chem
  30. from sklearn.cluster import KMeans
  31.  
  32. # =============================================================================
  33. ## Dacorator functions:
  34.  #   1. function_runtime
  35.  #   2. loadpickle_or_execute
  36.  #   3. loadjson_or_execute
  37. ## Module functions:
  38.  #   4. get_neighbours
  39.  #   5. parsebondfile
  40.  #   6. parsebondfile_asGraph
  41.  #   7. merge_bondfiles
  42.  #   8. merge_bondfiles_OLD
  43.  #   9. text_isequal
  44.  #   10. get_molecules
  45.  #   11. get_molecular_formula
  46.  #   12. get_SpeciesCountAtEveryTimestep
  47.  #   13. stepwise_species_count
  48.  #   14. stepwise_species_count_v2
  49.  #   15. expression_selector
  50.  #   16. pathway_tracker
  51.  #   17. step2picosecond
  52.  #   18. get_nearestindex
  53.  #   19. sort_molecular_formula
  54.  #   20. make_molecular_formula_latex
  55.  #   21. compute_molecular_weight
  56.  #   22. plot_species_heatmap
  57.  #   23. plot_species_heatmap_v2
  58.  #   24. atomic_weight
  59.  #   25. get_speciesVStemp
  60.  #   26. get_onset
  61.  #   27. bondorder_evolution
  62.  #   28. get_nbondsVStime
  63.  #   29. species_to_molecule
  64.  #   30. cumulative_nspecies
  65.  #   31. onset_plot
  66.  #   32. get_species_count
  67.  #   33. count_functinalGroup
  68.  #   34. atomConnectivity2smiles_OLD
  69.  #   35. moleculeGraph2smiles
  70.  #   36. get_bondtypes
  71.  #   37. draw_molecule_asGraph
  72. # =============================================================================
  73. def function_runtime(f):
  74.     @wraps(f)
  75.     def wrapper(*args,**kwargs):
  76.         start_time = time.time()
  77.         result = f(*args,**kwargs)
  78.         stop_time = time.time()
  79.         runtime = stop_time-start_time
  80.         minute = int(runtime/60)
  81.         second = runtime%60
  82.         if minute==0:
  83.             msg = "Execution time of {}: {:0.1f} sec".format(f.__name__,second)
  84.         else:
  85.             msg = "Execution time of {}: {} min {:0.1f} sec".format(f.__name__,minute,second)
  86.         print(msg)
  87.         return result
  88.     return wrapper
  89.  
  90. def loadpickle_or_execute(pickle_path,function, *args, **kwargs):
  91.     if os.path.exists(pickle_path):
  92.         # Load data from pickle
  93.         start = time.time()
  94.         print('Loading data from pickle instead of {}...'.format(function.__name__))
  95.         with open(pickle_path, 'rb') as pf:
  96.             data = pickle.load(pf)
  97.             stop = time.time()
  98.             _min = int((stop-start)/60)
  99.             _sec = (stop-start)%60
  100.             if _min==0:
  101.                 print('Loaded Succesfully!! Loading time: {:0.1f} sec'.format(_sec))
  102.             else:
  103.                 print('Loaded Succesfully!! Loading time: {} min {:0.1f} sec'.format(_min,_sec))
  104.             print('-'*60)
  105.     else:
  106.         # Execute the provided function to get the data
  107.         print('Loading data from {}...'.format(function.__name__))
  108.         data = function(*args,**kwargs)
  109.         print('Loaded Succesfully!!!')
  110.         print('-'*60)
  111.         # Save data to pickle for future use
  112.         with open(pickle_path, 'wb') as pf:
  113.             pickle.dump(data, pf)
  114.     return data
  115.  
  116. def loadjson_or_execute(pickle_path,function, *args, **kwargs):
  117.     if os.path.exists(pickle_path):
  118.         # Load data from pickle
  119.         print('Loading data from JSON instead of {}...'.format(function.__name__))
  120.         print('Loaded Succesfully!!!')
  121.         print('-'*60)
  122.         with open(pickle_path, 'r') as pf:
  123.             data = json.load(pf)
  124.     else:
  125.         # Execute the provided function to get the data
  126.         print('Loading data from {}...'.format(function.__name__))
  127.         data = function(*args,**kwargs)
  128.         print('Loaded Succesfully!!!')
  129.         print('-'*60)
  130.         # Save data to pickle for future use
  131.         with open(pickle_path, 'w') as pf:
  132.             json.dump(data, pf)
  133.     return data
  134.  
  135. # ---------------------List of Neighbours-----------------------------------
  136. @function_runtime
  137. def get_neighbours(bondfile,**kwargs):
  138.     #10 times faster than version 1
  139.    
  140.     #-------keyward arguments-------------
  141.     bo     = kwargs.get('bo',False) # to get bondorders
  142.     mtypes = kwargs.get('mtypes',False) # to get molecule ids
  143.     charge = kwargs.get('charge',False) # get charge of atoms
  144.     nlp    = kwargs.get('nlp',False) # get the number of lone pair
  145.     #-------------------------------------
  146.    
  147.     neighbours = {}
  148.     atomtypes  = {}
  149.     if bo: bondorders    = {}
  150.     if mtypes: molecule_types    = {}
  151.     if charge: charge = {}
  152.     if nlp: nlp = {}
  153.    
  154.     with open(bondfile) as bf:
  155.         prev_natoms = 0
  156.         natoms_flag = False
  157.         warning_flag = False
  158.         first_warning_ignore = True
  159.         atom_counter = 0
  160.        
  161.         for line in bf:
  162.             splitted = line.split()
  163.             if line.find('Timestep')!=-1:
  164.                 step = int(splitted[-1])
  165.                 neighbours[step]={}
  166.                 atomtypes = {}
  167.                 if bo: bondorders[step]  = {}
  168.                 if charge : charge[step] = {}
  169.                 if nlp: nlp[step] = {}
  170.                
  171.             if line.find('Number of particles')!=-1:
  172.                 current_natoms = int(splitted[-1])
  173.                
  174.                 if atom_counter!=current_natoms:
  175.                     if first_warning_ignore:
  176.                         first_warning_ignore=False
  177.                     else:
  178.                         warning_flag=True
  179.                    
  180.                 if natoms_flag and current_natoms!=prev_natoms:
  181.                     print('User warning from get_neighbours function: Lost atom warning!!')
  182.                 atom_counter = 0  
  183.                 prev_natoms  = current_natoms #new change
  184.             if splitted != [] and splitted[0].isnumeric():
  185.                 atom_counter            +=1
  186.                 parent                   = int(splitted[0])
  187.                 parent_type              = int(splitted[1])
  188.                 number_of_children       = int(splitted[2])
  189.                 children                 = list(map(int,splitted[3:3+number_of_children]))
  190.                 neighbours[step][parent] = children
  191.                 atomtypes[parent]        = parent_type
  192.                
  193.                 if mtypes:
  194.                     molecule_types[parent] = int(splitted[3+number_of_children])
  195.                 if bo:
  196.                     bo_id = 4+number_of_children
  197.                     bo_values = list(map(float,splitted[bo_id:bo_id+number_of_children]))
  198.                     bondorders[step][parent]={}
  199.                     for i,child in enumerate(children):
  200.                         bondorders[step][parent][child] = bo_values[i]
  201.                 if charge:
  202.                     charge[step][parent] = float(splitted[-1])
  203.                 if nlp:
  204.                     nlp[step][parent] = float(splitted[-2])
  205.        
  206.         if warning_flag:
  207.             print('User warning from get_neighbours function: Repeated atom information detected in some timesteps!')
  208.            
  209.         result = (neighbours,atomtypes,)
  210.         if bo: result+=(bondorders,)
  211.         if mtypes: result+=(molecule_types,)
  212.        
  213.         return result
  214.  
  215. # ---------------------List of Neighbours-----------------------------------
  216. @function_runtime
  217. def parsebondfile(bondfilepath, cutoff=0.3,**kwargs):
  218.     print(f"bond order cutoff {cutoff} is used")
  219.     #10 times faster than version 1
  220.    
  221.     #-------keyward arguments-------------
  222.     bo              = kwargs.get('bo',False) # to get bondorders
  223.     mtypes          = kwargs.get('mtypes',False) # to get molecule ids
  224.     charge          = kwargs.get('charge',False) # get charge of atoms
  225.     abo             = kwargs.get('abo',False)
  226.     nlp             = kwargs.get('nlp',False) # get the number of lone pair
  227.     mols            = kwargs.get('mols',False) # get mtypes wise molecues
  228.     ALL             = kwargs.get('ALL',False) # get everything
  229.     pkl             = kwargs.get('pkl',None) # Fatser the process (directory)
  230.     firstStep_only = kwargs.get('firstStep_only',False) # first step only
  231.     #-------------------------------------
  232.     if firstStep_only:
  233.         pkl = None
  234.         print('If firstStep_only is True, pkl automatically set to None')
  235.        
  236.     if pkl is not None:
  237.         ALL = True
  238.         print('If pkl is not None, ALL automatically set to True')
  239.     #-------------------------------------
  240.    
  241.    
  242.     def parse():
  243.         bonddata = {}
  244.         neighbours = {}
  245.         atomtypes  = {}
  246.         # bondorders[step][atom_id1][atom_id2]=bo between atom1 and atom2
  247.         if bo or ALL: bondorders    = {}
  248.         if mtypes or ALL: molecule_types    = {}
  249.         if charge or ALL: charges = {}
  250.         if nlp or ALL: NLP = {}
  251.         if abo or ALL: ABO = {}
  252.         if mols or ALL: molecules = {}
  253.         if firstStep_only:
  254.             fs_flag = False
  255.        
  256.         with open(bondfilepath) as bf:
  257.             prev_natoms = 0
  258.             natoms_flag = False
  259.             warning_flag = False
  260.             first_warning_ignore = True
  261.             atom_counter = 0
  262.            
  263.             for line in bf:
  264.                 splitted = line.split()
  265.                 if line.find('Timestep')!=-1:
  266.                     step = int(splitted[-1])
  267.                    
  268.                     if firstStep_only:
  269.                         if fs_flag:
  270.                             break
  271.                         fs_flag = True
  272.                        
  273.                     if bo or ALL: bondorders[step]  = {}
  274.                     if charge or ALL: charges[step] = {}
  275.                     if nlp or ALL: NLP[step] = {}
  276.                     if abo or ALL: ABO[step] = {}
  277.                    
  278.                     neighbours[step]={}
  279.                     atomtypes = {}
  280.                    
  281.                 if line.find('Number of particles')!=-1:
  282.                     current_natoms = int(splitted[-1])
  283.                    
  284.                     if atom_counter!=current_natoms:
  285.                         if first_warning_ignore:
  286.                             first_warning_ignore=False
  287.                         else:
  288.                             warning_flag=True
  289.                        
  290.                     if natoms_flag and current_natoms!=prev_natoms:
  291.                         print('User warning from get_neighbours function: Lost atom warning!!')
  292.                     atom_counter = 0  
  293.                     prev_natoms  = current_natoms #new change
  294.                    
  295.                 if splitted != [] and splitted[0].isnumeric():
  296.                     atom_counter       +=1
  297.                     parent             = int(splitted[0])
  298.                     parent_type        = int(splitted[1])
  299.                     number_of_children = int(splitted[2])
  300.                     children           = list(map(int,splitted[3:3+number_of_children]))
  301.                    
  302.                     # bond orders
  303.                     bo_id1 = 4+number_of_children
  304.                     bo_idn = bo_id1+number_of_children
  305.                     bo_values = list(map(float,splitted[bo_id1:bo_idn]))
  306.                    
  307.                     ## cutoffs
  308.                     updated_children = []
  309.                     for child,bond_order in zip(children,bo_values):
  310.                         if bond_order>=cutoff:
  311.                             updated_children.append(child)
  312.                    
  313.                     ## assigning values
  314.                     neighbours[step][parent] = updated_children
  315.                     atomtypes[parent]        = parent_type
  316.                    
  317.                     if mtypes or ALL:
  318.                         molecule_types[parent] = int(splitted[3+number_of_children])
  319.                     if bo or ALL:
  320.                         bondorders[step][parent]={}
  321.                         for i,child in enumerate(children):
  322.                             bondorders[step][parent][child] = bo_values[i]
  323.                        
  324.                     if charge or ALL:
  325.                         charges[step][parent] = float(splitted[-1])
  326.                     if nlp or ALL:
  327.                         NLP[step][parent] = float(splitted[-2])
  328.                     if abo or ALL:
  329.                         ABO[step][parent] = float(splitted[-3])
  330.                     if mols or ALL:
  331.                         mol_type = int(splitted[3+number_of_children])
  332.                         if mol_type in molecules.keys():
  333.                             molecules[mol_type].add(parent)
  334.                         else:
  335.                             molecules[mol_type] = set()
  336.            
  337.             if warning_flag:
  338.                 print('User warning from get_neighbours function: Repeated atom information detected in some timesteps!')
  339.                
  340.             bonddata['neighbours'] = neighbours
  341.             bonddata['atypes'] = atomtypes
  342.             if bo or ALL: bonddata['bondorders']  = bondorders
  343.             if mtypes or ALL: bonddata['mtypes']  = molecule_types
  344.             if charge or ALL: bonddata['charge']  = charges
  345.             if nlp or ALL: bonddata['nlp'] = NLP
  346.             if abo or ALL: bonddata['abo'] = ABO
  347.             if mols or ALL: bonddata['molecules'] = molecules
  348.            
  349.             if firstStep_only:
  350.                 for key, data in bonddata.items():
  351.                     if key in ['neighbours','bondorders']:
  352.                         for step, info in data.items():
  353.                             bonddata[key]=info
  354.                 bonddata['fstep'] = step
  355.            
  356.             return bonddata
  357.    
  358.     if pkl is not None:
  359.         if pkl == 'yes':
  360.             path   = path = bondfilepath[:bondfilepath.rfind('.')]+'.pickle'
  361.         else:
  362.             path = pkl
  363.            
  364.         if os.path.exists(path):
  365.             # Load data from pickle
  366.             print('Loading data from pickle...')
  367.             with open(path, 'rb') as pf:
  368.                 result = pickle.load(pf)
  369.         else:
  370.             # Execute the provided function to get the data
  371.             print('No pickle file exist. Data is dumping for further use.')
  372.             result = parse()
  373.             with open(path, 'wb') as pf:
  374.                 pickle.dump(result, pf)
  375.    
  376.     else:
  377.         result = parse()
  378.        
  379.     return result
  380.  
  381. ## trying to incorporate cutoffs between atom_types
  382. @function_runtime
  383. def mod_parsebondfile(bondfilepath,*args, cutoff=0.3,**kwargs):
  384.     print(f"bond order cutoff {cutoff} is used")
  385.     #10 times faster than version 1
  386.    
  387.     #-------keyward arguments-------------
  388.     bo              = kwargs.get('bo',False) # to get bondorders
  389.     mtypes          = kwargs.get('mtypes',False) # to get molecule ids
  390.     charge          = kwargs.get('charge',False) # get charge of atoms
  391.     nlp             = kwargs.get('nlp',False) # get the number of lone pair
  392.     mols            = kwargs.get('mols',False) # get mtypes wise molecues
  393.     ALL             = kwargs.get('ALL',False) # get everything
  394.     pkl             = kwargs.get('pkl',None) # Fatser the process (directory)
  395.     firstStep_only = kwargs.get('firstStep_only',False) # first step only
  396.     #-------------------------------------
  397.     if firstStep_only:
  398.         pkl = None
  399.         print('If firstStep_only is True, pkl automatically set to None')
  400.        
  401.     if pkl is not None:
  402.         ALL = True
  403.         print('If pkl is not None, ALL automatically set to True')
  404.     #-------------------------------------
  405.    
  406.     #--------args parameters-------
  407.     # arg--> [atom_type1,atom_type2,cutoff]
  408.     cutoff_dict = {}
  409.     for arg in args:
  410.         cutoff_dict[tuple(sorted((arg[0],arg[1])))] = arg[2]
  411.    
  412.    
  413.     ## getting atom types by reading only time first timestep
  414.     ## this atom id is required to set cutoff for different atom types
  415.     with open(bondfilepath) as bf:    
  416.         atomtypes   = {}
  417.         step_count  = 0
  418.         for line in bf:
  419.             splitted = line.split()
  420.             if line.find('Timestep')!=-1:
  421.                 step_count+=1
  422.             if step_count>1:
  423.                 break
  424.             if splitted != [] and splitted[0].isnumeric():
  425.                 atomid      = int(splitted[0])
  426.                 atom_type   = int(splitted[1])      
  427.                 atomtypes[atomid] = atom_type
  428.                
  429.                
  430.    
  431.    
  432.     def parse():
  433.         bonddata = {}
  434.         neighbours = {}
  435.         # bondorders[step][atom_id1][atom_id2]=bo between atom1 and atom2
  436.         if bo or ALL: bondorders    = {}
  437.         if mtypes or ALL: molecule_types    = {}
  438.         if charge or ALL: charges = {}
  439.         if nlp or ALL: NLP = {}
  440.         if mols or ALL: molecules = {}
  441.         if firstStep_only:
  442.             fs_flag = False
  443.        
  444.         with open(bondfilepath) as bf:
  445.             prev_natoms = 0
  446.             natoms_flag = False
  447.             warning_flag = False
  448.             first_warning_ignore = True
  449.             atom_counter = 0
  450.            
  451.             for line in bf:
  452.                 splitted = line.split()
  453.                 if line.find('Timestep')!=-1:
  454.                     step = int(splitted[-1])
  455.                    
  456.                     if firstStep_only:
  457.                         if fs_flag:
  458.                             break
  459.                         fs_flag = True
  460.                        
  461.                     if bo or ALL: bondorders[step]  = {}
  462.                     if charge or ALL: charges[step] = {}
  463.                     if nlp or ALL: NLP[step] = {}
  464.                    
  465.                     neighbours[step]={}
  466.                    
  467.                 if line.find('Number of particles')!=-1:
  468.                     current_natoms = int(splitted[-1])
  469.                    
  470.                     if atom_counter!=current_natoms:
  471.                         if first_warning_ignore:
  472.                             first_warning_ignore=False
  473.                         else:
  474.                             warning_flag=True
  475.                        
  476.                     if natoms_flag and current_natoms!=prev_natoms:
  477.                         print('User warning from get_neighbours function: Lost atom warning!!')
  478.                     atom_counter = 0  
  479.                     prev_natoms  = current_natoms #new change
  480.                    
  481.                 if splitted != [] and splitted[0].isnumeric():
  482.                     atom_counter       +=1
  483.                     parent             = int(splitted[0])
  484.                     parent_type        = int(splitted[1])
  485.                     number_of_children = int(splitted[2])
  486.                     children           = list(map(int,splitted[3:3+number_of_children]))
  487.                    
  488.                     # bond orders
  489.                     bo_id1 = 4+number_of_children
  490.                     bo_idn = bo_id1+number_of_children
  491.                     bo_values = list(map(float,splitted[bo_id1:bo_idn]))
  492.                    
  493.                     ## cutoffs
  494.                     updated_children = []
  495.                     for child,bond_order in zip(children,bo_values):
  496.                         if bond_order>=cutoff:
  497.                             updated_children.append(child)
  498.                    
  499.                     ## assigning values
  500.                     neighbours[step][parent] = updated_children
  501.                    
  502.                     if mtypes or ALL:
  503.                         molecule_types[parent] = int(splitted[3+number_of_children])
  504.                     if bo or ALL:
  505.                         bondorders[step][parent]={}
  506.                         for i,child in enumerate(children):
  507.                             bondorders[step][parent][child] = bo_values[i]
  508.                        
  509.                     if charge or ALL:
  510.                         charges[step][parent] = float(splitted[-1])
  511.                     if nlp or ALL:
  512.                         NLP[step][parent] = float(splitted[-2])
  513.                     if mols or ALL:
  514.                         mol_type = int(splitted[3+number_of_children])
  515.                         if mol_type in molecules.keys():
  516.                             molecules[mol_type].add(parent)
  517.                         else:
  518.                             molecules[mol_type] = set()
  519.            
  520.             if warning_flag:
  521.                 print('User warning from get_neighbours function: Repeated atom information detected in some timesteps!')
  522.                
  523.             bonddata['neighbours'] = neighbours
  524.             bonddata['atypes'] = atomtypes
  525.             if bo or ALL: bonddata['bondorders']  = bondorders
  526.             if mtypes or ALL: bonddata['mtypes']  = molecule_types
  527.             if charge or ALL: bonddata['charge']  = charges
  528.             if nlp or ALL: bonddata['nlp'] = NLP
  529.             if mols or ALL: bonddata['molecules'] = molecules
  530.            
  531.             if firstStep_only:
  532.                 for key, data in bonddata.items():
  533.                     if key in ['neighbours','bondorders']:
  534.                         for step, info in data.items():
  535.                             bonddata[key]=info
  536.                 bonddata['fstep'] = step
  537.            
  538.             return bonddata
  539.    
  540.     if pkl is not None:
  541.         if pkl == 'yes':
  542.             path   = path = bondfilepath[:bondfilepath.rfind('.')]+'.pickle'
  543.         else:
  544.             path = pkl
  545.            
  546.         if os.path.exists(path):
  547.             # Load data from pickle
  548.             print('Loading data from pickle...')
  549.             with open(path, 'rb') as pf:
  550.                 result = pickle.load(pf)
  551.         else:
  552.             # Execute the provided function to get the data
  553.             print('No pickle file exist. Data is dumping for further use.')
  554.             result = parse()
  555.             with open(path, 'wb') as pf:
  556.                 pickle.dump(result, pf)
  557.    
  558.     else:
  559.         result = parse()
  560.        
  561.     return result
  562.  
  563. @function_runtime
  564. def parsebondfile_asGraph(bondfilepath):
  565.     atomConnectivity = {} ## dict of nx graphs
  566.            
  567.     with open(bondfilepath) as bf:
  568.         prev_natoms = 0
  569.         natoms_flag = False
  570.         repeated_atom_warning_flag = False
  571.         first_warning_ignore = True
  572.         atom_counter = 0
  573.         hashline_flag = False
  574.        
  575.         ## predefining is necessary
  576.         step = -1
  577.         atomConnectGraph = nx.Graph()
  578.        
  579.         for line in bf:
  580.             splitted = line.split()
  581.            
  582.             if hashline_flag and 'Timestep' in splitted:
  583.                 if not nx.is_empty(atomConnectGraph):
  584.                     atomConnectivity[step]=atomConnectGraph
  585.            
  586.             if 'Timestep' in line:
  587.                 step             = int(splitted[-1])
  588.                 atomConnectGraph = nx.Graph()
  589.                
  590.                
  591.             if line.find('Number of particles')!=-1:
  592.                 current_natoms = int(splitted[-1])
  593.                
  594.                 if atom_counter!=current_natoms:
  595.                     if first_warning_ignore:
  596.                         first_warning_ignore=False
  597.                     else:
  598.                         repeated_atom_warning_flag=True
  599.                    
  600.                 if natoms_flag and current_natoms!=prev_natoms:
  601.                     print('User warning from get_neighbours function: Lost atom warning!!')
  602.                 atom_counter = 0  
  603.                 prev_natoms  = current_natoms #new change
  604.                
  605.             if splitted != [] and splitted[0].isnumeric():
  606.                 atom_counter    +=1
  607.                 parent           = int(splitted[0])
  608.                 parent_type      = int(splitted[1])
  609.                 n_children       = int(splitted[2])
  610.                 children         = list(map(int,splitted[3:3+n_children]))
  611.                
  612.                 bo_id = 4+n_children
  613.                 bo_values = list(map(float,splitted[bo_id:bo_id+n_children]))
  614.                
  615.                 atomConnectGraph.add_node(parent, atom_type=parent_type)
  616.                 for child, bo in zip(children,bo_values):
  617.                     atomConnectGraph.add_edge(parent, child, bond_order=bo)
  618.  
  619.             if len(splitted)==1 and splitted[0]=='#':
  620.                 hashline_flag = True
  621.             else:
  622.                 hashline_flag = False
  623.                    
  624.         if repeated_atom_warning_flag:
  625.             print('User warning from get_neighbours function: Repeated atom information detected in some timesteps!')
  626.    
  627.     return atomConnectivity
  628.            
  629.  
  630. @function_runtime
  631. def merge_bondfiles(*args,**kwargs):
  632.     # input: bondfilepath_1, start_ts_1, end_ts_1,
  633.     #       bondfilepath_2, start_ts_2, end_ts_2, ...., **kwargs
  634.    
  635.     #-------keyward arguments-------------
  636.     file     = kwargs.get('file','merge_bonds.reaxc')
  637.     # nevery   = kwargs.get('nevery',None)
  638.     #-------------------------------------
  639.    
  640.     #------arguments----------------------
  641.     paths, start_ts, end_ts = [], [], []
  642.     for i,arg in enumerate(args):
  643.         if i%3==0:
  644.             paths.append(arg)
  645.         elif i%3==1:
  646.             start_ts.append(arg)
  647.         else:
  648.             end_ts.append(arg)
  649.     #-------------------------------------
  650.     with open(file,'w') as mf:
  651.         for i, bondfilepath in enumerate(paths):
  652.             what_todo = 'do nothing' # flag for start writing
  653.             with open(bondfilepath,'r') as bf:
  654.                 sts = start_ts[i]
  655.                 ets = end_ts[i]
  656.                
  657.                 for line in bf:
  658.                     if line.find('Timestep')!=-1:
  659.                         cts = int(line.split()[-1]) # current timestep
  660.                         if cts == sts:
  661.                             what_todo = 'start writing'
  662.                         if cts == ets:
  663.                             what_todo = 'stop writing & break'
  664.                    
  665.                     if what_todo == 'start writing':
  666.                         mf.write(line)
  667.                     if what_todo == 'stop writing & break':
  668.                         break
  669.                
  670.  
  671.  
  672. def merge_bondfiles_OLD(*args,**kwargs):
  673.    
  674.     #-------keyward arguments-------------
  675.     file = kwargs.get('file','marge_bonds.reaxc')
  676.     #-------------------------------------
  677.    
  678.     startline = []
  679.     stopline  = []
  680.     for index,arg in enumerate(args):
  681.         skip_first = True
  682.         with open(arg,'r') as bf:
  683.             for line in bf:
  684.                 if index == 0 and line.find('Timestep')!=-1:
  685.                     startline.append(line)
  686.                     break
  687.                 if line.find('Timestep')!=-1:
  688.                     if skip_first:
  689.                         skip_first = False
  690.                         continue
  691.                     startline.append(line)
  692.                     break
  693.                
  694.     for i in range(len(startline)-1):
  695.         stopline.append(startline[i+1])
  696.     stopline.append(-1) # till end
  697.    
  698.     with open(file,'w') as mf:
  699.         for index,arg in enumerate(args):
  700.             start_writing = False
  701.             with open(arg,'r') as bf:
  702.                 for line in bf:
  703.                     if line == stopline[index] and stopline[index]!=-1:
  704.                         break
  705.                     if not line.endswith('\n'):
  706.                         line+='\n'
  707.                     if start_writing:
  708.                         mf.write(line)
  709.                     if line == startline[index]:
  710.                         if line.find('Timestep')!=-1:
  711.                             mf.write(line)
  712.                             start_writing = True
  713.     print('Merged bond file created!')
  714.  
  715. #--------------Is equal two text file -----------------------------------------                          
  716. def text_isequal(textfile1,textfile2,strip=False):
  717.     result = True
  718.     with open(textfile1,'r') as tf1, open(textfile2,'r') as tf2:
  719.         for i,lines in enumerate(zip(tf1,tf2)):
  720.             line1,line2=lines
  721.             if strip:
  722.                 line1 = line1.strip()
  723.                 line2 = line2.strip()
  724.             if line1!=line2:
  725.                 result = False
  726.                 print('Mismatched:', 'line',i+1)
  727.     return result
  728.    
  729.    
  730. # ---------------------get molecule from list neighbour------------------------------
  731. def get_molecules(neigh):
  732.     '''
  733.    Parameters
  734.    ----------
  735.    neigh : Dictionary
  736.        DESCRIPTION.
  737.        neighbours of a single timestep or neighbours[step]
  738.  
  739.    Returns
  740.    -------
  741.    molecules : Set
  742.        DESCRIPTION.
  743.    '''
  744.     graph = nx.Graph(neigh)
  745.     molecules = nx.connected_components(graph)    
  746.     return molecules
  747.  
  748. # -----------------------get chemical formula-------------------------------------
  749. def get_molecular_formula(molecule,atomtypes,atomsymbols):
  750.     '''
  751.    Parameters
  752.    ----------
  753.    molecule : any itterable
  754.        DESCRIPTION.
  755.    atomtypes : TYPE
  756.        DESCRIPTION.
  757.    atomsymbols : TYPE, optional
  758.        DESCRIPTION. The default is 'HCO'.
  759.  
  760.    Returns
  761.    -------
  762.    formula : TYPE
  763.        DESCRIPTION.
  764.  
  765.    '''
  766.     species = map(lambda x: atomtypes[x],molecule)
  767.     counter = [0]*len(atomsymbols)
  768.     for s in species:
  769.         counter[s-1]+=1
  770.    
  771.     formula = ''
  772.     for i in range(len(atomsymbols)):
  773.         if counter[i]!=0:
  774.             if counter[i]!=1:
  775.                 formula += atomsymbols[i]+str(counter[i])
  776.             else:
  777.                 formula += atomsymbols[i]
  778.     return formula
  779.  
  780. # ------------Get All Speceis Count At every Timestep--------
  781. @function_runtime
  782. def get_SpeciesCountAtEveryTimestep(neighbours,atomtypes,atomsymbols,exception=[],step2ps=False,step2psargs=[],minps=-math.inf,maxps=math.inf):
  783.     '''
  784.    Parameters
  785.    ----------
  786.    neighbours : Dictionary
  787.        DESCRIPTION.
  788.    atomtypes : Dictionary
  789.        DESCRIPTION.
  790.    atomsymbols : any iterable
  791.        DESCRIPTION.
  792.    exception : list (any iterable), optional
  793.        DESCRIPTION. The default is [].
  794.    step2ps : bool, optional
  795.        DESCRIPTION. The default is False.
  796.    step2psargs : list (any iterable), optional
  797.        DESCRIPTION. The default is [].
  798.    minps : TYPE, optional
  799.        DESCRIPTION. The default is -math.inf.
  800.    maxps : TYPE, optional
  801.        DESCRIPTION. The default is math.inf.
  802.  
  803.    Returns
  804.    -------
  805.    stepwise_species_count : Dictionary
  806.        DESCRIPTION.
  807.  
  808.    '''
  809.     #local function
  810.     def get_species(molecules,atomtypes,atomsymbols): #take the molecule file from get_molecute()
  811.         #atomlist=['H','C','O'], same order as TYPE
  812.         allspecies = []
  813.         for molecule in molecules:
  814.             species = get_molecular_formula(molecule, atomtypes,atomsymbols)
  815.             allspecies.append(species)
  816.         allspecies_count = Counter(allspecies)
  817.        
  818.         return allspecies_count
  819.  
  820.     ## if step2ps=True --> Convert timesteps to piccoseconds ##
  821.     _steps  = list(neighbours.keys())
  822.     s2ps = _steps.copy()
  823.     s2ps_dict = {}
  824.    
  825.     if step2ps:
  826.         if not step2psargs:
  827.             sys.exit('Value error: \'step2psargs\' is empty')
  828.         del s2ps
  829.         s2ps = step2picosecond(_steps, *step2psargs)
  830.    
  831.     s2ps_dict = dict(zip(_steps,s2ps))
  832.     ##-------------------------------------------------------##
  833.    
  834.     stepwise_species_count = {}
  835.     for step,neigh in neighbours.items():
  836.         molecules = get_molecules(neigh)
  837.         species_count = get_species(molecules, atomtypes,atomsymbols)
  838.        
  839.         ## Delete species from the Exception list ##
  840.         for ex in exception: del species_count[ex]
  841.         ##------------------------------------------##
  842.         if minps<=s2ps_dict[step]<maxps:
  843.             stepwise_species_count[s2ps_dict[step]]=species_count
  844.                
  845.     return stepwise_species_count    
  846.  
  847. @function_runtime
  848. def stepwise_species_count(neighbours,atomtypes,atomsymbols,step2ps=None):
  849.     # if requires ps instead of steps set step2ps = timestep
  850.     _swsc = {} # stepwise specis count _swsc[step][species]=count
  851.     if step2ps:
  852.         steps = list(neighbours.keys())
  853.         ps    = step2picosecond(steps, step2ps)
  854.     for i,items in enumerate(neighbours.items()):
  855.         step,neigh = items
  856.         molecules = get_molecules(neigh)
  857.         _sc = {} # species count
  858.         for molecule in molecules:
  859.             species = get_molecular_formula(molecule, atomtypes,atomsymbols)
  860.             if species in _sc.keys():
  861.                 _sc[species]+=1
  862.             else:
  863.                 _sc[species] = 1
  864.                
  865.         if step2ps:
  866.             _swsc[ps[i]]=_sc
  867.         else:
  868.             _swsc[step]=_sc
  869.     return _swsc    
  870.  
  871. @function_runtime
  872. def stepwise_species_count_v2(atomtypes,atomsymbols,*args,**kwargs):
  873.     # if requires ps instead of steps set step2ps = timestep
  874.     step2ps = kwargs.get('step2ps',None)
  875.     kind    = kwargs.get('kind','sum')
  876.    
  877.     def inner(neighbours):
  878.         # if requires ps instead of steps set step2ps = timestep
  879.         _swsc = {} # stepwise specis count _swsc[step][species]=count
  880.         if step2ps:
  881.             steps = list(neighbours.keys())
  882.             ps    = step2picosecond(steps, step2ps)
  883.         for i,items in enumerate(neighbours.items()):
  884.             step,neigh = items
  885.             molecules = get_molecules(neigh)
  886.             _sc = {} # species count
  887.             for molecule in molecules:
  888.                 species = get_molecular_formula(molecule, atomtypes,atomsymbols)
  889.                 if species in _sc.keys():
  890.                     _sc[species]+=1
  891.                 else:
  892.                     _sc[species] = 1
  893.                    
  894.             if step2ps:
  895.                 _swsc[ps[i]]=_sc
  896.             else:
  897.                 _swsc[step]=_sc
  898.         return _swsc
  899.    
  900.     result = []
  901.     for arg in args:
  902.         result.append(inner(arg))
  903.    
  904.     output = {}
  905.     for out in result:
  906.         for step, species_count in out.items():
  907.             if step not in output.keys():
  908.                 output[step]={}
  909.             for species,count in species_count.items():
  910.                 if species not in output[step].keys():
  911.                     output[step][species] = count
  912.                 else:
  913.                     output[step][species] +=count
  914.     if kind=='sum':
  915.         pass
  916.     elif kind=='mean':
  917.         n = len(args)
  918.         print(n)
  919.         for step, species_count in output.items():
  920.             for species,count in species_count.items():
  921.                     cc = int(count/n)
  922.                     output[step][species] = cc+1 if cc!=count/n else cc
  923.     else:
  924.         print('User Error from stepwise_species_count: kind key error')
  925.     return output
  926.        
  927.  
  928. # ---------------------Expression Selector----------------------------------------
  929. def expression_selector(neighbours,atomtypes,file='expression_selector.txt',atomsybols='HCO',heading=None):
  930.     steps = neighbours.keys()
  931.     with open(file, 'w') as f:
  932.         if heading:
  933.             f.write(heading+'\n'+'-------------------------------'+'\n')
  934.         for step in steps:
  935.             molecules = get_molecules(neighbours[step])
  936.             f.write('Timestep='+str(step)+'\n')
  937.            
  938.             for molecule in molecules:
  939.                 formula = get_molecular_formula(molecule, atomtypes,atomsybols)
  940.                 f.write('Timestep='+str(step)+'\t')
  941.                 f.write('Molecule: '+formula+'\n')
  942.                 f.write('Molecule: '+str(molecule)+'\n')
  943.                 for atom in molecule:
  944.                     f.write('ParticleIdentifier=='+str(atom)+'|| ')
  945.                 f.write('\n\n')
  946.                
  947. # ---------------Pathway Tracker Of a singler molecule---------------------------
  948. def pathway_tracker(seek_molecule,neighbours,atomtypes,file='pathway_tracker.txt',atomsybols='HCO'):
  949.     seek_molecule_formula = get_molecular_formula(seek_molecule, atomtypes)
  950.     steps = neighbours.keys()
  951.     file = '{}'.format(seek_molecule)+seek_molecule_formula+'_'+file
  952.    
  953.     with open(file, 'w') as f:
  954.         f.write('This is a pathway tracker for {}'.format(seek_molecule_formula)+'\n')
  955.         f.write('{}'.format(seek_molecule))
  956.         f.write('-----------------------------------------------------------------\n\n')
  957.         pathway = []
  958.         for step in steps:
  959.             molecules = get_molecules(neighbours[step])
  960.             for molecule in molecules:
  961.                 formula = get_molecular_formula(molecule,atomtypes,'HCO')
  962.                 intersection = set(seek_molecule) & set(molecule)
  963.                 if intersection and formula not in pathway:
  964.                     pathway.append(formula)
  965.                     f.write('Timestep='+str(step)+'\t')
  966.                     f.write('Molecule: '+formula+'\n')
  967.                     for atom in molecule:
  968.                         f.write('ParticleIdentifier=='+str(atom)+'|| ')
  969.                        
  970.                     f.write('\n{}\n\n'.format(molecule))
  971.     return pathway
  972.  
  973. # --------------------------------------------------------------
  974. def step2picosecond(steps,*args):
  975.     timestep =[]
  976.     if len(args)==1:
  977.         tstep1, = args
  978.         current_time = steps[0]*tstep1/1000
  979.         previous_step = steps[0]
  980.        
  981.         for current_step in steps:
  982.             current_time += (current_step-previous_step)*tstep1/1000  
  983.             previous_step = current_step
  984.             timestep.append(current_time)
  985.         return timestep
  986.    
  987.     elif len(args)==3:
  988.         tstep1,tstep2,limit = args
  989.         current_time = steps[0]*tstep1/1000
  990.         previous_step = steps[0]
  991.        
  992.         for current_step in steps:
  993.             if current_step<=limit:
  994.                 current_time += (current_step-previous_step)*tstep1/1000
  995.             else:        
  996.                 current_time += (current_step-previous_step)*tstep2/1000
  997.             previous_step = current_step
  998.             timestep.append(current_time)
  999.         return timestep
  1000.     else:
  1001.         sys.exit('Error: The length of the argument in the step2picosecond() function is not specified correctly.\n')
  1002.  
  1003.  
  1004. # -----------getting index of specific timestep---------------
  1005. def get_nearestindex(_timestep,*args,**kwargs):
  1006.     if len(args)==1:
  1007.         _time = args[0]
  1008.     key = kwargs.get('key',None) # posible value: 'first','last'
  1009.     if key=='first':
  1010.         index = 0
  1011.     elif key=='last':
  1012.         index = len(_timestep)-1
  1013.     elif key==None:
  1014.         _temp = [abs(_value-_time) for _value in _timestep]
  1015.         _mini = min(_temp)
  1016.         index = _temp.index(_mini)
  1017.     else:
  1018.         sys.exit('User Error from get_nearestindex: Key not found!')
  1019.     return index
  1020.  
  1021.  
  1022. # -----------sort chemical formula in this order: C,H,O---------------
  1023. def sort_molecular_formula(molecular_formula,order=['C','H','O']):
  1024.     """
  1025.    Sorts the elements in a molecular formula (or a list of formulas) according
  1026.    to a specified order.
  1027.    
  1028.    Parameters:
  1029.    molecular_formula (str or iterable of str): The molecular formula(s)
  1030.                                                to be sorted.
  1031.    order (list of str): The order in which elements should be sorted within
  1032.                         the formula. Default is ['C', 'H', 'O'].
  1033.  
  1034.    Returns:
  1035.    str or list of str: Sorted molecular formula(s).
  1036.    """
  1037.    
  1038.     def do_sort(species):
  1039.         """
  1040.        Helper function to sort a single molecular formula.
  1041.        """
  1042.         item = re.findall('[A-Z][a-z]?|\d+|.', species)+['']
  1043.         elements = []
  1044.         for i in range(len(item)-1):
  1045.             if item[i].isalpha() and item[i+1].isnumeric():
  1046.                 elements.append((item[i],item[i+1]))
  1047.             elif item[i].isalpha() and not item[i+1].isnumeric():
  1048.                 elements.append((item[i],''))
  1049.                
  1050.         # Check if all elements are in the specified order list
  1051.         symbols = [x for x, _ in elements]
  1052.         if not set(symbols).issubset(set(order)):
  1053.             raise ValueError('Some elements are not in the order list! '
  1054.                              'Please specify the order list accordingly.')
  1055.        
  1056.         for i in range(len(elements)):
  1057.             elements[i]+=(order.index(elements[i][0]),)
  1058.         elements.sort(key=lambda x:x[2])
  1059.         sorted_chem_formula = ''
  1060.         for element in elements:
  1061.             sorted_chem_formula+=element[0]+element[1]
  1062.         return sorted_chem_formula
  1063.          
  1064.    
  1065.     # Handle both single string and iterable of strings cases
  1066.     if isinstance(molecular_formula, str):
  1067.         # molecular_formula is a single string
  1068.         result = do_sort(molecular_formula)
  1069.     else:
  1070.         try:
  1071.             iterator = iter(molecular_formula)
  1072.             if all(isinstance(item, str) for item in iterator):
  1073.                 # Input is an iterable of strings
  1074.                 result = []
  1075.                 for species in molecular_formula:
  1076.                     result.append(do_sort(species))
  1077.             else:
  1078.                 raise TypeError("Iterable must contain only strings")
  1079.                
  1080.         except TypeError:
  1081.             # Input is neither a single string nor an iterable of strings
  1082.             raise TypeError("Input must be a string or an iterable of strings")
  1083.        
  1084.     return result
  1085.    
  1086. # ------------------make_latex-----------------------------------------
  1087. def make_molecular_formula_latex(molecular_formula,sort=False):
  1088.     """
  1089.    Converts a molecular formula or a list of formulas to LaTeX format.
  1090.  
  1091.    Parameters:
  1092.    molecular_formula (str or iterable of str): The molecular formula(s)
  1093.                                                to be converted.
  1094.    sort (bool): Whether to sort the elements in the formula(s) according to
  1095.                 a predefined order.
  1096.  
  1097.    Returns:
  1098.    str or list of str: Molecular formula(s) in LaTeX format.
  1099.    """
  1100.    
  1101.     if sort:
  1102.         molecular_formula = sort_molecular_formula(molecular_formula)
  1103.    
  1104.     if isinstance(molecular_formula, str):
  1105.         formula = [molecular_formula]
  1106.     else:
  1107.         formula = molecular_formula.copy()
  1108.    
  1109.    
  1110.    
  1111.     latex_formula = ['']*len(formula)
  1112.     for i in range(len(formula)):
  1113.         formula[i]+='$'
  1114.         for j in range(len(formula[i])-1):
  1115.             if formula[i][j].isalpha() and formula[i][j+1].isnumeric():
  1116.                 latex_formula[i]+=formula[i][j] + '_{'
  1117.             elif formula[i][j].isnumeric() and not formula[i][j+1].isnumeric():
  1118.                 latex_formula[i]+=formula[i][j] + '}'
  1119.             else:
  1120.                 latex_formula[i]+=formula[i][j]
  1121.         latex_formula[i] = '$'+latex_formula[i]+'$'
  1122.    
  1123.     if len(latex_formula)==1:
  1124.         return latex_formula[0]
  1125.     else:
  1126.         return latex_formula
  1127.  
  1128. # --------------------------------------------------------------
  1129. def compute_molecular_weight(species,exclude=[]):
  1130.     atomic_info    = atomic_weight(key='all')
  1131.     item = re.findall('[A-Z][a-z]?|\d+|.', species)
  1132.     item.append('q') #fake
  1133.     molecular_weight = 0.0
  1134.     for i in range(len(item)-1):
  1135.         now,nxt = item[i],item[i+1]
  1136.         if now in exclude:
  1137.             continue
  1138.         if now.isalpha() and nxt.isalpha():
  1139.             if now in atomic_info:
  1140.                 molecular_weight+=atomic_info[now]
  1141.             else:
  1142.                 sys.exit('Error: No such element \'{}\' in the compute_molecular_weight() function'.format(now))
  1143.         elif now.isalpha() and nxt.isnumeric():
  1144.             if now in atomic_info:
  1145.                 molecular_weight+=atomic_info[now]*float(nxt)
  1146.             else:
  1147.                 sys.exit('Error: No such element \'{}\' in the compute_molecular_weight() function'.format(now))
  1148.     return molecular_weight
  1149.  
  1150. # ----------------------Heatmap of Species--------------------------------
  1151. @function_runtime
  1152. def plot_species_heatmap(atomtypes,atomsymbols,*neighbours_list,**kwargs):
  1153.     # keyward agruments:
  1154.         # nspecies (int)   = number of species to be plot (default value = 20)
  1155.         # ts (int)         = timestep (default value = 0.25)
  1156.         # savedir (string) = directory to save (default value = '')
  1157.         # titile (string)  = title to be shown on the top (default value = '')
  1158.         # order (list)     = order of the species. Takes list of species (default = mean abundance)
  1159.         # pikle (string)   = load from pickle. It takes pickle_path (default = None)
  1160.         # skipts (float)   = skip some data from the first (default = None)
  1161.         # topspec (list)   = species that appears on the top (default = None)
  1162.         #
  1163.        
  1164.     nspecies    = kwargs.get('nspecies', 20)
  1165.     ts          = kwargs.get('ts', 0.25)
  1166.     savedir     = kwargs.get('savedir', '')
  1167.     title       = kwargs.get('title', '')
  1168.     order       = kwargs.get('order',None)
  1169.     pickle      = kwargs.get('pickle',None)
  1170.     skipts      = kwargs.get('skipts',None) # skip timestep
  1171.     topspec     = kwargs.get('topspec',None)
  1172.     ignor       = kwargs.get('ignor',None)
  1173.     kind        = kwargs.get('kind','sum') # kind = sum,mean
  1174.     log         = kwargs.get('log',True)
  1175.     exclude     = kwargs.get('exclude',None)
  1176.     fontsize    = kwargs.get('fontsize', 12)
  1177.     figsize     = kwargs.get('figsize',(15,8))
  1178.    
  1179.     if pickle:
  1180.         data = loadpickle_or_execute(pickle, stepwise_species_count_v2, atomtypes, atomsymbols, *neighbours_list, step2ps=ts)
  1181.     else:
  1182.         data = stepwise_species_count_v2(atomtypes, atomsymbols,*neighbours_list,step2ps=ts,kind=kind)
  1183.    
  1184.     if log:
  1185.         df = pd.DataFrame(data).fillna(1)
  1186.     else:
  1187.         df = pd.DataFrame(data).fillna(0)
  1188.    
  1189.     if order:
  1190.         df = df.loc[order,:]
  1191.     else:
  1192.         df.loc[:,'sum'] = df.sum(axis=1)
  1193.         df = df.sort_values(by='sum',ascending=False)
  1194.         df = df.drop(['sum'],axis=1)
  1195.        
  1196.     #############-exclude-##################
  1197.     if exclude is not None:
  1198.         df = df.drop(index=exclude)
  1199.     ######################################
  1200.    
  1201.     #############-skipts-##################
  1202.     if skipts is not None:
  1203.         df = df.loc[:,skipts:]
  1204.         df.columns = df.columns-skipts
  1205.     ######################################
  1206.    
  1207.     #############-ignor-##################
  1208.     if ignor is not None:  # ignor = (main_species,'close')
  1209.         if isinstance(ignor, tuple):
  1210.             main_species, kind = ignor
  1211.             main = compute_molecular_weight(main_species)
  1212.            
  1213.             H = compute_molecular_weight('H')
  1214.             O = compute_molecular_weight('O')
  1215.             criteria = []
  1216.            
  1217.             for a in [0,1,2,3]:
  1218.                 for b in [0,1,2,3]:
  1219.                     if a==0 and b==0:
  1220.                         continue
  1221.                     cri = a*H+b*O
  1222.                     criteria.append(cri)
  1223.            
  1224.             for chem in df.index:
  1225.                 current = compute_molecular_weight(chem)            
  1226.                 for cri in criteria:
  1227.                     if abs(abs(main-current)-cri)<=0.01:
  1228.                         print(chem,'ignored')
  1229.                         df = df.drop(chem)
  1230.                         continue
  1231.         elif isinstance(ignor, str):
  1232.             df = df.drop([ignor],axis=0)
  1233.     #####################################
  1234.    
  1235.    
  1236.     df = df.head(nspecies)
  1237.     ##############-topspec-###############
  1238.     if topspec is not None:
  1239.         desired_order = []
  1240.         for spec in topspec:
  1241.             if spec in df.index:
  1242.                 desired_order.append(spec)
  1243.         for spec in df.index:
  1244.             if spec not in desired_order:
  1245.                 desired_order.append(spec)
  1246.         df = df.reindex(index=desired_order)
  1247.     ######################################
  1248.        
  1249.     print('Species ORDER:',list(df.index))
  1250.     df.index = make_molecular_formula_latex(df.index,sort=True)
  1251.     _, ax1 = plt.subplots(figsize=figsize)
  1252.     if log:
  1253.         ax = sns.heatmap(df,cmap='jet',ax=ax1 ,
  1254.                      cbar_kws={'label': 'Number of species'},
  1255.                      norm=LogNorm(),xticklabels=1000)
  1256.     else:
  1257.         ax = sns.heatmap(df,cmap='jet',ax=ax1 ,
  1258.                      cbar_kws={'label': 'Number of species'},
  1259.                      xticklabels=1000)
  1260.     ax.set_xlabel('Time (ps)',fontsize=fontsize+1)
  1261.     # ax.set_ylabel('Species')
  1262.     plt.tick_params(left=False,bottom=False)
  1263.     plt.yticks(rotation=0,fontsize=fontsize)
  1264.     fig = ax.get_figure()
  1265.     title += '-{}\nTop {} species\n'.format(kind,nspecies)
  1266.     ax.set_title(title)
  1267.     ax.set_xticklabels([0,250,500,750,1000],fontsize=fontsize)
  1268.    
  1269.    
  1270.     # cbar
  1271.     ax.figure.axes[-1].yaxis.label.set_size(fontsize+1) # cbar label size
  1272.     cbar = ax.collections[0].colorbar
  1273.     cbar.ax.tick_params(labelsize=fontsize)
  1274.    
  1275.     saveplot = 'species_heatmap_'+str(random.randint(100000, 999999))
  1276.     if savedir: savedir += '\\'+saveplot
  1277.     else: savedir += saveplot
  1278.     fig.savefig(savedir, dpi=400, bbox_inches='tight')
  1279.    
  1280. # ----------------------Heatmap of Species from multiple neighbours--------------------------------
  1281. @function_runtime
  1282. def plot_species_heatmap_v2(neighbours,atomtypes,atomsymbols,**kwargs):
  1283.     # keyward agruments:
  1284.         # nspecies (int)   = number of species to be plot (default value = 20)
  1285.         # ts (int)         = timestep (default value = 0.25)
  1286.         # savedir (string) = directory to save (default value = '')
  1287.         # titile (string)  = title to be shown on the top (default value = '')
  1288.         # order (list)     = order of the species. Takes list of species (default = mean abundance)
  1289.         # pikle (string)   = load from pickle. It takes pickle_path (default = None)
  1290.         # skipts (float)   = skip some data from the first (default = None)
  1291.         # topspec (list)   = species that appears on the top (default = None)
  1292.        
  1293.     nspecies    = kwargs.get('nspecies', 20)
  1294.     ts          = kwargs.get('ts', 0.25)
  1295.     savedir     = kwargs.get('savedir', '')
  1296.     title       = kwargs.get('title', '')
  1297.     order       = kwargs.get('order',None)
  1298.     pickle      = kwargs.get('pickle',None)
  1299.     skipts        = kwargs.get('skipts',None) # skip timestep
  1300.     topspec     = kwargs.get('topspec',None)
  1301.     ignor       = kwargs.get('ignor',None)
  1302.     kind        = kwargs.get('kind','sum') # kind = 'sum' or 'mean'
  1303.     log         = kwargs.get('log',True)
  1304.     exclude     = kwargs.get('exclude',None)
  1305.    
  1306.     if pickle:
  1307.         data = loadpickle_or_execute(pickle, stepwise_species_count, neighbours, atomtypes, atomsymbols,step2ps=ts)
  1308.     else:
  1309.         data = stepwise_species_count(neighbours, atomtypes, atomsymbols,
  1310.                                       step2ps=ts)
  1311.     ###########-fill with###############
  1312.     if log:
  1313.         df = pd.DataFrame(data).fillna(1)
  1314.     else:
  1315.         df = pd.DataFrame(data).fillna(0)
  1316.     ####################################
  1317.    
  1318.     if order:
  1319.         df = df.loc[order,:]
  1320.     else:
  1321.         df.loc[:,'sum'] = df.sum(axis=1)
  1322.         df = df.sort_values(by='sum',ascending=False)
  1323.         df = df.drop(['sum'],axis=1)
  1324.    
  1325.     #############-exclude-##################
  1326.     if exclude is not None:
  1327.         df = df.drop(index=exclude)
  1328.     ######################################
  1329.    
  1330.     #############-skipts-##################
  1331.     if skipts is not None:
  1332.         df = df.loc[:,skipts:]
  1333.         df.columns = df.columns-skipts
  1334.     ######################################
  1335.    
  1336.     #############-ignor-##################
  1337.     if ignor is not None:  # ignor = (main_species,'close')
  1338.         if isinstance(ignor, tuple):
  1339.             main_species, kindd = ignor
  1340.             main = compute_molecular_weight(main_species)
  1341.            
  1342.             H = compute_molecular_weight('H')
  1343.             O = compute_molecular_weight('O')
  1344.             criteria = []
  1345.            
  1346.             for a in [0,1,2,3]:
  1347.                 for b in [0,1,2,3]:
  1348.                     if a==0 and b==0:
  1349.                         continue
  1350.                     cri = a*H+b*O
  1351.                     criteria.append(cri)
  1352.            
  1353.             for chem in df.index:
  1354.                 current = compute_molecular_weight(chem)            
  1355.                 for cri in criteria:
  1356.                     if abs(abs(main-current)-cri)<=0.01:
  1357.                         print(chem,'ignored')
  1358.                         df = df.drop(chem)
  1359.                         continue
  1360.         elif isinstance(ignor, str):
  1361.             df = df.drop([ignor],axis=0)
  1362.     #####################################
  1363.    
  1364.    
  1365.     df = df.head(nspecies)    
  1366.     ##############-topspec-###############
  1367.     if topspec is not None:
  1368.         desired_order = []
  1369.         for spec in topspec:
  1370.             if spec in df.index:
  1371.                 desired_order.append(spec)
  1372.         for spec in df.index:
  1373.             if spec not in desired_order:
  1374.                 desired_order.append(spec)
  1375.         df = df.reindex(index=desired_order)
  1376.     ######################################
  1377.        
  1378.     print('Species ORDER:',list(df.index))
  1379.     df.index = make_molecular_formula_latex(df.index,sort=True)
  1380.     #df.loc['$O_{2}$',:] = df.loc['$O_{2}$',:] + 200
  1381.     _, ax1 = plt.subplots(figsize=(15,8))
  1382.     if log:
  1383.         ax = sns.heatmap(df,cmap='jet',ax=ax1 ,
  1384.                          cbar_kws={'label': 'Number of species'},
  1385.                          norm=LogNorm(),xticklabels=1000)
  1386.     else:
  1387.         ax = sns.heatmap(df,cmap='jet',ax=ax1 ,
  1388.                          cbar_kws={'label': 'Number of species'},
  1389.                          xticklabels=1000)
  1390.        
  1391.     ax.set_xlabel('Time (ps)')
  1392.     ax.set_ylabel('Species')
  1393.     plt.tick_params(left=False,bottom=False)
  1394.     plt.yticks(rotation=0)
  1395.     fig = ax.get_figure()
  1396.     title += '---{}\nTop {} species'.format(kind,nspecies)
  1397.     ax.set_title(title)
  1398.     plt.show()
  1399.    
  1400.     saveplot = 'species_heatmap_'+str(random.randint(100000, 999999))
  1401.     if savedir: savedir += '\\'+saveplot
  1402.     else: savedir += saveplot
  1403.     fig.savefig(savedir, dpi=400, bbox_inches='tight')
  1404. # ------Get Atomic Mass --------------------------------
  1405. def atomic_weight(*args,**kwargs):
  1406.     # args   = element string
  1407.     # kwargs = {key:'all'}
  1408.     key = kwargs.get('key',None)
  1409.     MM_of_Elements = {'H': 1.00794, 'He': 4.002602, 'Li': 6.941, 'Be': 9.012182,
  1410.                       'B': 10.811, 'C': 12.0107, 'N': 14.0067, 'O': 15.9994,'F': 18.9984032,
  1411.                       'Ne': 20.1797, 'Na': 22.98976928, 'Mg': 24.305, 'Al': 26.9815386,
  1412.                       'Si': 28.0855, 'P': 30.973762, 'S': 32.065, 'Cl': 35.453, 'Ar': 39.948,
  1413.                       'K': 39.0983, 'Ca': 40.078, 'Sc': 44.955912, 'Ti': 47.867, 'V': 50.9415,
  1414.                       'Cr': 51.9961, 'Mn': 54.938045, 'Fe': 55.845, 'Co': 58.933195, 'Ni': 58.6934,
  1415.                       'Cu': 63.546, 'Zn': 65.409, 'Ga': 69.723, 'Ge': 72.64, 'As': 74.9216,
  1416.                       'Se': 78.96, 'Br': 79.904, 'Kr': 83.798, 'Rb': 85.4678, 'Sr': 87.62,
  1417.                       'Y': 88.90585, 'Zr': 91.224, 'Nb': 92.90638, 'Mo': 95.94, 'Tc': 98.9063,
  1418.                       'Ru': 101.07, 'Rh': 102.9055, 'Pd': 106.42, 'Ag': 107.8682, 'Cd': 112.411,
  1419.                       'In': 114.818, 'Sn': 118.71, 'Sb': 121.760, 'Te': 127.6, 'I': 126.90447,
  1420.                       'Xe': 131.293, 'Cs': 132.9054519, 'Ba': 137.327, 'La': 138.90547,
  1421.                       'Ce': 140.116, 'Pr': 140.90465, 'Nd': 144.242, 'Pm': 146.9151, 'Sm': 150.36,
  1422.                       'Eu': 151.964, 'Gd': 157.25, 'Tb': 158.92535, 'Dy': 162.5,
  1423.                       'Ho': 164.93032, 'Er': 167.259, 'Tm': 168.93421,
  1424.                       'Yb': 173.04, 'Lu': 174.967, 'Hf': 178.49,
  1425.                       'Ta': 180.9479, 'W': 183.84, 'Re': 186.207,
  1426.                       'Os': 190.23, 'Ir': 192.217, 'Pt': 195.084,
  1427.                       'Au': 196.966569, 'Hg': 200.59, 'Tl': 204.3833,
  1428.                       'Pb': 207.2, 'Bi': 208.9804, 'Po': 208.9824,
  1429.                       'At': 209.9871, 'Rn': 222.0176, 'Fr': 223.0197,
  1430.                       'Ra': 226.0254, 'Ac': 227.0278, 'Th': 232.03806,
  1431.                       'Pa': 231.03588, 'U': 238.02891, 'Np': 237.0482,
  1432.                       'Pu': 244.0642, 'Am': 243.0614, 'Cm': 247.0703,
  1433.                       'Bk': 247.0703, 'Cf': 251.0796, 'Es': 252.0829,
  1434.                       'Fm': 257.0951, 'Md': 258.0951, 'No': 259.1009,
  1435.                       'Lr': 262, 'Rf': 267, 'Db': 268, 'Sg': 271,
  1436.                       'Bh': 270, 'Hs': 269, 'Mt': 278, 'Ds': 281,
  1437.                       'Rg': 281, 'Cn': 285, 'Nh': 284, 'Fl': 289,
  1438.                       'Mc': 289, 'Lv': 292, 'Ts': 294, 'Og': 294,'': 0}
  1439.  
  1440.     if len(args)==1:
  1441.         element = args[0]
  1442.         return MM_of_Elements[element]
  1443.     elif key=='all':
  1444.         return MM_of_Elements
  1445.     else:
  1446.         sys.exit('User Error from atomic_weight: Unknown kwargs or number of args')
  1447. # ------ONSET Finding----------------------
  1448. @function_runtime
  1449. def get_speciesVStemp(species,neighbour,atomtypes,atomsymbols,**kwargs):
  1450.     ########## Soft Warnings ################
  1451.     if 'ramp' not in kwargs.keys():
  1452.         print('User Warning from get_speciesVStemp: Ramping rate not specified by the user. The default value of 1 K/ps has been set')
  1453.     if 'ts' not in kwargs.keys():
  1454.         print('User Warning from get_speciesVStemp: Timestep not specified by the user. The default value of 0.25 fs has been set')
  1455.     if 'it' not in kwargs.keys():
  1456.         print('User Warning from get_speciesVStemp: Initial temperature not specified by the user. The default value of 0 K has been set')
  1457.        
  1458.     ######### Getting kwargs #################    
  1459.     ramp         = kwargs.get('ramp',1) # temp ramp rate (Default: 1 K/ps)
  1460.     it           = kwargs.get('it',0) # initial temp (Default: 0 K)
  1461.     ts           = kwargs.get('ts',0.25)
  1462.     method       = kwargs.get('method','mf')
  1463.                    # calculate onset based on method
  1464.                    # mf = molecular formula
  1465.                    # mw = molecular weight
  1466.     lim        = kwargs.get('lim',0)
  1467.                    # if method='mw', it will count species if the mw
  1468.                    # within the 'lim'
  1469.    
  1470.     if method=='mf':
  1471.         count = {}
  1472.         for step,neigh in neighbour.items():
  1473.             count[step]=0
  1474.             molecules = get_molecules(neigh)
  1475.             for molecule in molecules:
  1476.                 mf = get_molecular_formula(molecule, atomtypes, atomsymbols)
  1477.                 if mf==species:
  1478.                     #print(mf)
  1479.                     count[step]+=1
  1480.             #print(step,count[step])
  1481.  
  1482.     elif method=='mw':
  1483.         count = {}
  1484.         mw_species = compute_molecular_weight(species)
  1485.         for step,neigh in neighbour.items():
  1486.             count[step]=0
  1487.             molecules = get_molecules(neigh)
  1488.             for molecule in molecules:
  1489.                 mf = get_molecular_formula(molecule, atomtypes, atomsymbols)
  1490.                 mw = compute_molecular_weight(mf)
  1491.                 if abs(mw-mw_species)<=lim:
  1492.                     count[step]+=1
  1493.                     #print(mw,mw_species,end='\t')
  1494.     else:
  1495.         sys.exit('Method not found!!!')
  1496.    
  1497.     steps          = list(count.keys())
  1498.     sc             = np.array(list(count.values())) # species count
  1499.     ps             = np.array(step2picosecond(steps, ts))
  1500.     temp           = it + ramp*ps
  1501.     return temp,sc
  1502.  
  1503. def get_onset(temp,sc,**kwargs):
  1504.     imc          = kwargs.get('imc',50) # initial molecular count
  1505.     ig           = kwargs.get('ig',[2200,107,0,50]) # initial guess for curve fit
  1506.     show_fit     = kwargs.get('show_fit',False)
  1507.    
  1508.     #-----Fit Function----------------------
  1509.     def fit(x, A, B, C, D):  
  1510.         y = C+((D-C)/(1+np.exp((x-A)/B)))
  1511.         return y
  1512.  
  1513.     def inv_fit(y,A,B,C,D):
  1514.         if D-y<0:
  1515.             print('Warning from get_onset: Negative D value adjusted!!')
  1516.             D = 49.98
  1517.         x = A + B*np.log((D-y)/(y-C))
  1518.         return x
  1519.     #-----------------------------------------
  1520.    
  1521.     p, cov = curve_fit(fit, temp, sc,ig,maxfev=2000)
  1522.     if show_fit :
  1523.         print('Parameters: ',p)
  1524.         print('Covariance',cov)
  1525.     sc_fit         = fit(temp,*p)
  1526.     onset          = inv_fit(imc-1, *p) # onset temp at which 1 molecule
  1527.    
  1528.     # R^2 Value
  1529.     sc_mean = np.mean(sc)
  1530.     tss = np.sum((sc - sc_mean)**2)
  1531.     rss = np.sum((sc - sc_fit)**2)
  1532.     r2 = 1 - (rss / tss)
  1533.     print('R square value: ', r2)
  1534.    
  1535.     return onset, sc_fit
  1536.  
  1537. def bondorder_evolution(bondorders,bondlist,**kwargs):
  1538.     if 'ts' not in kwargs.keys():
  1539.         print('User Warning from get_speciesVStemp: Timestep not specified by the user. The default value of 0.25 fs has been set')
  1540.     ## Getting kwargs ##
  1541.     ts           = kwargs.get('ts',0.25)
  1542.     title        = kwargs.get('title',"")
  1543.     savedir      = kwargs.get('savedir', '')
  1544.     skipts       = kwargs.get('skipts',None) # skip timestep
  1545.     sort         = kwargs.get('sort',None) #ascending,descending or index(ts)
  1546.     figsize      = kwargs.get('figsize',(6,10))
  1547.     fontsize     = kwargs.get('fontsize',10)
  1548.     plot         = kwargs.get('plot','yes')
  1549.     ps2temp      = kwargs.get('ps2temp',None) # (it,ramp)
  1550.                         # it   = initial temp
  1551.                         # ramp = ramping rate
  1552.    
  1553.     steps = list(bondorders.keys())
  1554.     bo_evolution = {}
  1555.     ps = np.array(step2picosecond(steps, ts))
  1556.     xlabel = ''#'Time (ps)'
  1557.    
  1558.     if ps2temp:
  1559.         xlabel = 'Temrature (K)'
  1560.         it,ramp = ps2temp
  1561.         ps = it + ps*ramp
  1562.  
  1563.     for index, atompair in enumerate(bondlist):
  1564.         atom1,atom2 = atompair
  1565.         bo_evolution[index+1]={}
  1566.         for step,p in zip(steps,ps):
  1567.             bo = bondorders[step][atom1].get(atom2,0)
  1568.             bo_evolution[index+1][p]=bo
  1569.            
  1570.     #############-sort-##################      
  1571.     df = pd.DataFrame(bo_evolution)
  1572.     df = df.transpose()    
  1573.     if sort is not None:
  1574.         if sort == 'ascending':
  1575.             df.loc[:,'sum'] = df.sum(axis=1)
  1576.             df = df.sort_values(by='sum')
  1577.             df = df.drop(['sum'],axis=1)      
  1578.         elif sort == 'descending':
  1579.             df.loc[:,'sum'] = df.sum(axis=1)
  1580.             df = df.sort_values(by='sum',ascending=False)
  1581.             df = df.drop(['sum'],axis=1)
  1582.         elif type(sort) in [float,int]:
  1583.             df = df.sort_values(by=sort)
  1584.         else:
  1585.             print('User Error from bondorder_evolution: sort key not found')
  1586.             sys.exit()
  1587.     #####################################
  1588.     df.index = range(1,len(bondlist)+1)
  1589.     #############-skipts-##################
  1590.     if skipts is not None:
  1591.         df = df.loc[:,skipts:]
  1592.         df.columns = df.columns-skipts
  1593.     ######################################
  1594.    
  1595.     if plot=='yes':
  1596.         _, ax1 = plt.subplots(figsize=figsize)
  1597.         hm = sns.heatmap(df,cmap='jet',cbar_kws={ 'label': 'Bond order',},xticklabels=1000,ax=ax1,vmin=0.0,vmax=3.0)
  1598.         hm.set_xlabel(xlabel,fontsize=fontsize+2)
  1599.         # hm.set_ylabel('Bonds ',fontsize=fontsize)
  1600.         plt.yticks(rotation=0)
  1601.        
  1602.         #############-Y Tick Labels-###########
  1603.         hm.set_yticklabels([])
  1604.        
  1605.         ## set xticks location and labels ####
  1606.         # hm.set_xticks([795*x for x in range(5)])
  1607.         print([int(x) for x in hm.get_xticks()])
  1608.         hm.set_xticklabels([0,250,500,750,1000])
  1609.         plt.xticks(rotation=90,fontsize=fontsize+2)
  1610.         ###########
  1611.        
  1612.         hm.set_title(title,fontsize=fontsize-5)
  1613.         # Set the font size of the y-label
  1614.         hm.set_yticklabels(hm.get_yticklabels(), fontsize=fontsize)
  1615.    
  1616.    
  1617.         ######## colorbar settings #########
  1618.         cbar = hm.collections[0].colorbar
  1619.         cbar.ax.tick_params(labelsize=fontsize+2)
  1620.         hm.figure.axes[-1].yaxis.label.set_size(15+4)
  1621.         ####################################
  1622.        
  1623.         if savedir:
  1624.             fig = hm.get_figure()
  1625.             savedir = savedir+'\\bo_evolution_'+str(random.randint(100000, 999999))
  1626.             fig.savefig(savedir, dpi=400, bbox_inches='tight')
  1627.    
  1628.     return df
  1629.  
  1630. @function_runtime
  1631. def get_nbondsVStime(bondorders, atomtypes, bondlist,**kwargs):
  1632.     # plot the number of bonds vs time
  1633.     ## Getting kwargs and setting up default values ##
  1634.     cutoff = kwargs.get('cutoff',1)
  1635.     tol    = kwargs.get('tol',0.20)
  1636.     ts     = kwargs.get('ts',0.25) # timestep
  1637.     plot   = kwargs.get('plot',None) # values: 'yes'
  1638.     skipts = kwargs.get('skipts',None) # skip timestep
  1639.     cumu   = kwargs.get('cumu',False)
  1640.    
  1641.     ## Getting args
  1642.    
  1643.     nbonds = []
  1644.     cumu_nbonds = []
  1645.     cumu_count = 0
  1646.     steps  = []
  1647.     for step,bo in bondorders.items():
  1648.         steps.append(step)
  1649.         bondcount = 0
  1650.         for bond in bondlist:
  1651.             atom1, atom2 = bond
  1652.             try:
  1653.                 bovalue = bo[atom1][atom2]
  1654.             except:
  1655.                 bovalue = 0
  1656.                
  1657.             # if bovalue!=0: print(step,bovalue,end=' | ')
  1658.             if abs(bovalue-cutoff)<=tol:
  1659.                 bondcount+=1
  1660.                 cumu_count+=1
  1661.         nbonds.append(bondcount)
  1662.         cumu_nbonds.append(cumu_count)
  1663.        
  1664.     ps = step2picosecond(steps, ts)
  1665.    
  1666.     if skipts is not None:
  1667.         startid = ps.index(skipts)
  1668.         ps      = np.array(ps[startid+1:])
  1669.         nbonds  = np.array(nbonds[startid+1:])
  1670.         if cumu:
  1671.             cumu_nbonds  = np.array(cumu_nbonds[startid+1:])
  1672.         ps = ps - skipts
  1673.        
  1674.     # ploting
  1675.     if plot == 'yes':
  1676.         plt.plot(ps,nbonds,color='black')
  1677.         plt.xlabel('Time (ps)')
  1678.         plt.ylabel('Number of bonds')
  1679.    
  1680.     if cumu:
  1681.         return ps, cumu_nbonds
  1682.     else:
  1683.         return ps, nbonds
  1684.        
  1685.    
  1686.  
  1687. @function_runtime
  1688. def species_to_molecule(bonddata,atomsymbols, species,**kwargs): # species-->molecule
  1689.     # speceis is chemcical formula of a molecule
  1690.     # molecule here is the list of atom ids that refer the molecule
  1691.    
  1692.     ###########-get kwargs-###############
  1693.     source    = kwargs.get('source',False)
  1694.     dump      = kwargs.get('dump',None)
  1695.     shortinfo = kwargs.get('shortinfo',False)
  1696.     ######################################
  1697.     def atomids2expression(atoms): # atom ids to ovito expression selection
  1698.         atoms = list(atoms)
  1699.         expression = ''
  1700.         for atom in atoms:
  1701.             if atoms.index(atom)==len(atoms)-1:
  1702.                 expression+='ParticleIdentifier=='+str(atom)
  1703.             else:
  1704.                 expression+='ParticleIdentifier=='+str(atom)+'|| '
  1705.        
  1706.         return expression
  1707.    
  1708.    
  1709.    
  1710.     neighbours = bonddata['neighbours']
  1711.     # bondorders = bonddata['bondorders']
  1712.     atypes  = bonddata['atypes']
  1713.     mtypes     = bonddata['mtypes']
  1714.     molecules  = bonddata['molecules']
  1715.    
  1716.     seeklist    = []
  1717.     seekstep    = []
  1718.     if dump is not None:
  1719.         f = open(dump,'w')
  1720.     if shortinfo:
  1721.         sf = open(dump[:dump.rfind('.')]+'_shortinfo.txt','w')
  1722.     for step,neigh in neighbours.items():
  1723.         PRINT = []
  1724.         graph = nx.Graph(neigh)
  1725.         connected = nx.connected_components(graph)
  1726.         for component in connected:
  1727.             componentformula = get_molecular_formula(component, atypes, atomsymbols)
  1728.             if componentformula == species and component not in seeklist:                
  1729.                 seeklist.append(component)
  1730.                 seekstep.append(step)
  1731.                 # print(step)
  1732.                 # print(componentformula,component)
  1733.                 # print(atomids2expression(component))
  1734.                 if dump is not None:
  1735.                     print(step,file=f)
  1736.                     print(componentformula,component,file=f)
  1737.                     print(atomids2expression(component),file=f)
  1738.                 if shortinfo:
  1739.                     print(step,file=sf)
  1740.                     print(componentformula+' =',end=' ',file=sf)
  1741.                 if source:
  1742.                     src = {mtypes[x] for x in component}
  1743.                     for j,s in enumerate(src):
  1744.                         # print('Source {}:'.format(s))
  1745.                         # print(atomids2expression(molecules[s]))
  1746.                         # print('-------------------')
  1747.                         if dump is not None:
  1748.                             print('Source {}:'.format(s),file=f)
  1749.                             print(atomids2expression(molecules[s]),file=f)
  1750.                             print('-------------------',file=f)
  1751.                         if shortinfo:
  1752.                             PRINT.append('M-{}'.format(s))
  1753.                 # print('-'*200)
  1754.                 if dump is not None: print('-'*200,file=f)
  1755.                 if shortinfo:
  1756.                     print(*PRINT,sep=' + ',file=sf)
  1757.                     print(*PRINT,sep=' + ')
  1758.     f.close()
  1759.     return seeklist,seekstep
  1760.  
  1761. @function_runtime
  1762. def cumulative_nspecies(neighbours,atypes,atomsymbols,specieslist,**kwargs):
  1763.     ###########-get kwargs-###############
  1764.     skipts    = kwargs.get('skipts',None)
  1765.     ts        = kwargs.get('ts',0.25)
  1766.     ######################################
  1767.    
  1768.     count = [0]*len(specieslist)
  1769.     steps = list(neighbours.keys())
  1770.     ccount = [[] for i in range(len(specieslist))]
  1771.     checklist = [[] for i in range(len(specieslist))]
  1772.     for step,neigh in neighbours.items():
  1773.         molecules = get_molecules(neigh)
  1774.         for molecule in molecules:
  1775.             formula = get_molecular_formula(molecule, atypes, atomsymbols)
  1776.             for i,species in enumerate(specieslist):
  1777.                 if species==formula and molecule not in checklist[i]:
  1778.                     checklist[i].append(molecule)
  1779.                     count[i]+=1
  1780.         for i in range(len(ccount)):
  1781.             ccount[i].append(count[i])
  1782.        
  1783.     ps     = step2picosecond(steps, ts)
  1784.    
  1785.     if skipts is not None:
  1786.         index  = ps.index(skipts)
  1787.         ps     = ps[index:]
  1788.         for i in range(len(ccount)):
  1789.             ccount[i] = ccount[i][index:]
  1790.    
  1791.     ps = np.array(ps)-min(ps)
  1792.     print(len(ccount))
  1793.     ccount = [np.array(x) for x in ccount]  
  1794.    
  1795.     return ps,ccount
  1796.  
  1797. def onset_plot(path,whole,atomsymbols,timestep,temp_ramp,initial_temp,**kwargs):
  1798.    
  1799.     ## getting kwargs
  1800.     color    = kwargs.get('color',None)  # assign random color
  1801.     sim_path = kwargs.get('sim_path',['Sim-1','Sim-2','Sim-3'])
  1802.     ax       = kwargs.get('ax',None)
  1803.    
  1804.    
  1805.     df = pd.DataFrame()
  1806.     ## Iterate over number of simulations
  1807.     for sim in sim_path:
  1808.         bondfilepath = path+'\\'+sim+'\\bonds.reaxc'
  1809.    
  1810.         ## Geting neighbours for each simulations
  1811.         bonddata   = parsebondfile(bondfilepath)
  1812.         neighbours = bonddata['neighbours']
  1813.         atypes     = bonddata['atypes']
  1814.        
  1815.         ## Getting temperatures using steps, timesteps, temp_ramp, initial_temp
  1816.         steps      = np.array(list(neighbours.keys()))
  1817.         time       = steps*timestep/1000  # in piccosecond
  1818.         temp       = initial_temp + time*temp_ramp
  1819.         df['temp'] = temp
  1820.        
  1821.         ##  Getting number of whole
  1822.         nwhole     = []
  1823.         for step, neigh in neighbours.items():
  1824.             count = 0
  1825.             molecules = get_molecules(neigh)
  1826.             for molecule in molecules:
  1827.                 species = get_molecular_formula(molecule, atypes, atomsymbols)
  1828.                 if species == whole:
  1829.                     count+=1
  1830.             nwhole.append(count)
  1831.         df[sim]=nwhole
  1832.    
  1833.     ## Getting the Upper and Lower bound
  1834.     nsim = len(sim_path)
  1835.     upper_bound = df.iloc[:, -nsim:].max(axis=1)
  1836.     lower_bound = df.iloc[:, -nsim:].min(axis=1)
  1837.    
  1838.     ## plotting the fill-between plot
  1839.     x       = df['temp']
  1840.     if ax is None: fig, ax = plt.subplots()
  1841.     ax.fill_between(x, lower_bound, upper_bound, color=color,alpha=0.2)
  1842.    
  1843.     ## default fit function
  1844.     def function(x, A, B, C, D):  
  1845.         y = C+((D-C)/(1+np.exp((x-A)/B)))
  1846.         return y
  1847.     ## creating average sim data out of df
  1848.     y         = df.iloc[:,-nsim:].mean(axis=1)
  1849.    
  1850.     ## curve fit
  1851.     popt, cov = curve_fit(function, x, y,p0=[2200,107,0,25])
  1852.     y_fit     = function(x, *popt)
  1853.    
  1854.     ## plot fit values
  1855.     ax.plot(x,y_fit,color=color)
  1856.    
  1857.     ## getting onset using newton-rahpson or secant
  1858.     y_target  = df.iloc[:100,-1].mean()
  1859.     print(y_target)
  1860.     onset     = newton(lambda x: function(x,*popt)-y_target, x0=300)
  1861.     print('Onset: ', onset)
  1862.    
  1863.     ## plotting a onset indicating verticle dashed line
  1864.     ax.plot([onset]*2,[0,y_target],'--',color=color)
  1865.    
  1866.     return fig,ax
  1867.  
  1868. @function_runtime
  1869. def get_species_count(bondfilepath,atomsymbols,cutoff=0.3):
  1870.     # output a panda dataframe of specie timeseries
  1871.    
  1872.     bonddata   = parsebondfile(bondfilepath,cutoff=cutoff)
  1873.     neighbours = bonddata['neighbours']
  1874.     atypes     = bonddata['atypes']
  1875.    
  1876.     count = {}
  1877.     for step, neigh in neighbours.items():
  1878.         molecules = get_molecules(neigh)
  1879.         count[step] = {}
  1880.         for molecule in molecules:
  1881.             species = get_molecular_formula(molecule, atypes, atomsymbols)
  1882.             if species in count[step]:
  1883.                 count[step][species]+=1
  1884.             else:
  1885.                 count[step][species]=1
  1886.        
  1887.     df = pd.DataFrame(count).fillna(0)
  1888.     df.index.name = 'Timestep'
  1889.     return df
  1890.  
  1891. # doubt
  1892. @function_runtime
  1893. def count_functinalGroup(neighbours,atypes,seek):
  1894.     group = {'OH': [3,[1,2]],
  1895.              'COOH': [2,[1,3,3]],
  1896.              'Keto': [2,[2,2,3]],
  1897.              'Aldy': [2,[1,2,3]]}
  1898.    
  1899.     count = [0]*len(seek)
  1900.     for i, ss in enumerate(seek):
  1901.         if ss not in group:
  1902.             target_parent_type    = group[ss][0]
  1903.             target_children_types = group[ss][1]
  1904.         else:
  1905.             print('hi')
  1906.             raise('{} functional group not found!'.format(ss))
  1907.        
  1908.         checklist = []
  1909.         for step, neigh in neighbours.items():
  1910.             for parent, children in neigh.items():
  1911.                 children_types = sorted([atypes[x] for x in children])
  1912.                 match = atypes[parent]==target_parent_type and \
  1913.                             children_types == sorted(target_children_types)    
  1914.                 if match and parent not in checklist:
  1915.                     count[i]+=1
  1916.                     checklist.append(parent)
  1917.     return count
  1918.  
  1919. def atomConnectivity2smiles_OLD(atomConnectivity,atypes,atomic_num):
  1920.     # atomConnectivity is a networkx Graph (to be speciec subgraph, same
  1921.     # shit though).  
  1922.     # This converts connected atom graphs, which are obtained from the atom
  1923.     # neighbors in a single frame to SMILES. Essentially, the connectivity
  1924.     # represents a subgraph of each molecule, extracted from its neighbors.
  1925.    
  1926.     mol = Chem.RWMol()
  1927.    
  1928.     # Tips:
  1929.     # If the atom IDs in your atomConnectivity graph are not sequential
  1930.     # starting from 0 or if they are random, you'll need to create a mapping
  1931.     # between these IDs and the indices of atoms in the RDKit molecule.
  1932.     # This is because RDKit expects atom indices to start from 0 and
  1933.     # increment sequentially.
  1934.     # No worries! I have done this already. atomid2index is the mapping
  1935.    
  1936.    
  1937.     ## adding atoms
  1938.     atomid2index = {}
  1939.     for node in atomConnectivity.nodes():
  1940.         atomicNumber = atomic_num[atypes[node]-1]
  1941.         if atomicNumber==1:
  1942.             continue
  1943.         atom_index = mol.AddAtom(Chem.Atom(atomicNumber))
  1944.         atomid2index[node] = atom_index
  1945.        
  1946.     ## adding bonds
  1947.     for u, v, bond_type in atomConnectivity.edges(data='bond_type'):
  1948.         uan = atomic_num[atypes[u]-1]
  1949.         van = atomic_num[atypes[v]-1]
  1950.         if uan==1 or van==1:
  1951.             continue
  1952.        
  1953.         if bond_type==1:
  1954.             rdkitBondType = Chem.BondType.SINGLE
  1955.         elif bond_type==2:
  1956.             rdkitBondType = Chem.BondType.DOUBLE
  1957.         elif bond_type==3:
  1958.             rdkitBondType = Chem.BondType.TRIPLE
  1959.         else:
  1960.             print('User ERROR: bond type is not recognized')
  1961.             print('Setting the bond type as single')
  1962.             rdkitBondType = Chem.BondType.SINGLE
  1963.            
  1964.         mol.AddBond(atomid2index[u], atomid2index[v], rdkitBondType)
  1965.    
  1966.     mol = mol.GetMol()
  1967.     smiles = Chem.MolToSmiles(mol)
  1968.     return smiles
  1969.  
  1970. def moleculeGraph2smiles(moleculeGraph,atomic_num,n_clusters,plot_cluster=False,bo_analysis=True,atom_types=None):
  1971.     # moleculeGraph is a networkx Graph (to be speciec subgraph, same
  1972.     # shit though) of a single molecule having node attr as atom_type and
  1973.     # edge attr as bond_order. This converts molecule graphs to SMILES.
  1974.     import warnings
  1975.     warnings.filterwarnings("ignore")
  1976.    
  1977.     mol = Chem.RWMol()
  1978.    
  1979.     # Tips:
  1980.     # If the atom IDs in your moleculeGraph graph are not sequential
  1981.     # starting from 0 or if they are random, you'll need to create a mapping
  1982.     # between these IDs and the indices of atoms in the RDKit molecule.
  1983.     # This is because RDKit expects atom indices to start from 0 and
  1984.     # increment sequentially.
  1985.     # No worries! I have done this already. atomid2index is the mapping
  1986.    
  1987.     #######################################################################
  1988.     # auto-defining the bond type using unsupervised clustering algorithm
  1989.     #######################################################################
  1990.     if bo_analysis:
  1991.         bo_list = []
  1992.         for u,v,bond_order in moleculeGraph.edges(data='bond_order'):
  1993.             bo_list.append(bond_order)
  1994.         bo_array = np.array(bo_list).reshape(-1, 1)
  1995.         # Apply KMeans clustering
  1996.    
  1997.         kmeans = KMeans(n_clusters=n_clusters,n_init=10,random_state=0)
  1998.         kmeans.fit(bo_array)
  1999.         labels = kmeans.labels_
  2000.         centroids = kmeans.cluster_centers_
  2001.        
  2002.         ref_bond_orders = [1.0,1.5,2.0]
  2003.        
  2004.        
  2005.         # Assign labels to clusters
  2006.         cluster_labels = {}
  2007.         for i, center in enumerate(centroids):
  2008.             # Find which reference bond order is closest to this cluster center
  2009.             closest_ref_label = np.argmin(np.abs(ref_bond_orders - center)) + 1
  2010.             cluster_labels[i] = closest_ref_label
  2011.        
  2012.        
  2013.         # Labeling each bond order
  2014.         new_labels = [cluster_labels[label] for label in kmeans.labels_]
  2015.         bond_order2type = dict(zip(bo_list,new_labels))
  2016.        
  2017.         # Visualize the clusters
  2018.         # Create scatter plots for each type
  2019.         colors = ['r','b','g','grey','orange','k','cyan']
  2020.         if plot_cluster:
  2021.             for i in range(n_clusters):
  2022.                 plt.scatter(np.arange(bo_array.size)[labels == i],
  2023.                             bo_array[labels==i],s=60,edgecolor='k',
  2024.                             color=colors[i])
  2025.             plt.show()
  2026.         ########################################################################
  2027.         ########################################################################
  2028.        
  2029.         ## adding atoms
  2030.         atomid2index = {}
  2031.         for node, atom_type in moleculeGraph.nodes(data='atom_type'):
  2032.             atomicNumber = atomic_num[atom_type-1]
  2033.             if atomicNumber==1: # we are neglecting Hydrogen in SMILE
  2034.                 continue
  2035.             atom_index = mol.AddAtom(Chem.Atom(atomicNumber))
  2036.             atomid2index[node] = atom_index
  2037.            
  2038.         ## adding bonds
  2039.         for u, v, bond_order in moleculeGraph.edges(data='bond_order'):
  2040.             bond_type = bond_order2type[bond_order]
  2041.             u_atom_type = moleculeGraph.nodes[u]['atom_type']
  2042.             v_atom_type = moleculeGraph.nodes[v]['atom_type']
  2043.             uan = atomic_num[u_atom_type - 1]
  2044.             van = atomic_num[v_atom_type - 1]
  2045.             if uan == 1 or van == 1:
  2046.                 continue
  2047.            
  2048.             if bond_type == 1:
  2049.                 rdkitBondType = Chem.BondType.SINGLE
  2050.             elif bond_type == 2:
  2051.                 rdkitBondType = Chem.BondType.DOUBLE
  2052.             elif bond_type == 3:
  2053.                 rdkitBondType = Chem.BondType.TRIPLE
  2054.             else:
  2055.                 print('User ERROR: bond type is not recognized')
  2056.                 print('Setting the bond type as single')
  2057.                 rdkitBondType = Chem.BondType.SINGLE
  2058.                
  2059.             mol.AddBond(atomid2index[u], atomid2index[v], rdkitBondType)
  2060.    
  2061.     else: # if bo_analysis == False
  2062.         atomid2index = {}
  2063.         for node in moleculeGraph.nodes:
  2064.             atomicNumber = atomic_num[atom_types[node]-1]
  2065.             if atomicNumber==1: # we are neglecting Hydrogen in SMILE
  2066.                 continue
  2067.             atom_index = mol.AddAtom(Chem.Atom(atomicNumber))
  2068.             atomid2index[node] = atom_index
  2069.            
  2070.         ## adding bonds
  2071.         for u, v in moleculeGraph.edges:
  2072.             u_atom_type = atom_types[u]
  2073.             v_atom_type = atom_types[v]
  2074.             uan = atomic_num[u_atom_type - 1]
  2075.             van = atomic_num[v_atom_type - 1]
  2076.             if uan == 1 or van == 1:
  2077.                 continue
  2078.             mol.AddBond(atomid2index[u], atomid2index[v], Chem.BondType.SINGLE)
  2079.    
  2080.     mol = mol.GetMol()
  2081.     smiles = Chem.MolToSmiles(mol)
  2082.     return smiles
  2083.  
  2084.  
  2085. def get_bondtypes(bo,n_clusters,plot=False):  
  2086.     ## getting all bond orders in a np array
  2087.     ## bo is the bond order of of atomconnectivity of each frame
  2088.    
  2089.     bo_list = []
  2090.     for atom1, inner_dict in bo.items():
  2091.         for atom2, bond_order in inner_dict.items():
  2092.             if bond_order not in bo_list:
  2093.                 bo_list.append(bond_order)
  2094.    
  2095.     bo_array = np.array(bo_list).reshape(-1, 1)
  2096.     # Apply KMeans clustering
  2097.  
  2098.     kmeans = KMeans(n_clusters=n_clusters,n_init=10)
  2099.     kmeans.fit(bo_array)
  2100.    
  2101.     # Re-labeling the cluster. min_bo=Label-1, mid_bo=Label-2, max_bo=Label-3
  2102.     # One problem. Only work if single-double-triple, this order maintained
  2103.     # For example: single-triple = won't work; double-triple = won't work
  2104.     # Only single = will work, single-double = will work
  2105.     labels = kmeans.labels_
  2106.     centroids = kmeans.cluster_centers_
  2107.    
  2108.     # Pair up cluster labels with their centroids and sort
  2109.     sorted_clusters = sorted(enumerate(centroids), key=lambda x: x[1])
  2110.    
  2111.     # Create a mapping from old to new labels
  2112.     label_mapping = {old_label: new_label+1 for new_label, (old_label, _) in enumerate(sorted_clusters)}
  2113.    
  2114.     # Apply the new labels to your data
  2115.     new_labels = [label_mapping[label] for label in labels]
  2116.    
  2117.     # Visualize the clusters
  2118.     # Create scatter plots for each type
  2119.     colors = ['r','b','g','grey','orange','k','cyan']
  2120.     if plot:
  2121.         for i in range(n_clusters):
  2122.             plt.scatter(np.arange(bo_array.size)[labels == i],
  2123.                         bo_array[labels==i],s=60,edgecolor='k',
  2124.                         color=colors[i])
  2125.         plt.show()
  2126.    
  2127.     return bo_array, np.array(new_labels)
  2128.  
  2129.  
  2130.  
  2131. def draw_molecule_asGraph(G,atomsymbols,elabel=True):
  2132.     ## Input: Graph with node attr 'atom_type', edge attr 'bond_order'
  2133.     ##              get the atomConnectivity as graph using
  2134.     ##              bfp.parsebondfile_asGraph function
  2135.     ## Draw graph with 'CHO' node and bond_order as edge label
  2136.    
  2137.     # Color mapping
  2138.     atom_type_color = {
  2139.         1: 'white',  # atom_type 1, white with black edge
  2140.         2: 'gray',   # atom_type 2, gray with black edge
  2141.         3: 'red'     # atom_type 3, red with black edge
  2142.     }
  2143.    
  2144.     # Apply the color mapping to each node
  2145.     node_colors = [atom_type_color[attr['atom_type']] for node, attr in G.nodes(data=True)]
  2146.    
  2147.     # Node labels: {node_id: atom_type}
  2148.     node_labels = {node: atomsymbols[attr['atom_type']-1] for node, attr in G.nodes(data=True)}
  2149.    
  2150.    
  2151.     # Position nodes using a layout
  2152.     pos = nx.spring_layout(G)
  2153.    
  2154.     # Draw the graph
  2155.     nx.draw(G, pos, with_labels=True, labels=node_labels,
  2156.             node_color=node_colors, edgecolors='black',node_size=500)
  2157.    
  2158.     if elabel:
  2159.         # Edge labels: {(u, v): bond_order}
  2160.         edge_labels = {(u, v): attr['bond_order'] for u, v, attr in G.edges(data=True)}
  2161.         # Draw edge labels (bond orders)
  2162.         nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement