Spaces:

alexsoleg
/

cartnet-demo

Sleeping

App Files Files Community

Àlex Solé commited on Jan 16

Commit

fa790e2

1 Parent(s): 4e099cc

fixed bug multiple structures csd

Browse files

Files changed (4) hide show

main.py +88 -69
main_local.py +76 -78
process.py +53 -54
utils.py +1 -1

main.py CHANGED Viewed

@@ -8,6 +8,7 @@ from models.master import create_model
 from process import process_data
 from utils import radius_graph_pbc
 import gc
 MEAN_TEMP = torch.tensor(192.1785) #training temp mean
 STD_TEMP = torch.tensor(81.2135) #training temp std
@@ -16,7 +17,7 @@ STD_TEMP = torch.tensor(81.2135) #training temp std
 @torch.no_grad()
 def main():
     model = create_model()
-    st.title("CartNet ADP Prediction")
     st.image('fig/pipeline.png')
     st.markdown("""
@@ -24,85 +25,101 @@ def main():
     """)
     uploaded_file = st.file_uploader("Upload a CIF file", type=["cif"], accept_multiple_files=False)
-    # uploaded_file = "ABABEM.cif"
     if uploaded_file is not None:
         try:
-            with open(uploaded_file.name, "wb") as f:
-                f.write(uploaded_file.getbuffer())
             filename = str(uploaded_file.name)
-            # Read the CIF file using ASE
-            atoms = read(filename, format="cif")
-            cif = ReadCif(filename)
-            cif_data = cif.first_block()
-            if "_diffrn_ambient_temperature" in cif_data.keys():
-                temperature = float(cif_data["_diffrn_ambient_temperature"].split("(")[0])
-            elif "_cell_measurement_temperature" in cif_data.keys():
-                temperature = float(cif_data["_cell_measurement_temperature"].split("(")[0])
-            else:
-                raise ValueError("Temperature not found in the CIF file. \
-                                    Please provide a temperature in the field _diffrn_ambient_temperature o in the field _cell_measurement_temperature from the CIF file.")
-            st.success("CIF file successfully read.")
-            data = Data()
-            data.x = torch.tensor(atoms.get_atomic_numbers(), dtype=torch.int32)
-            if len(atoms.positions) > 1000:
-                st.markdown("""
-                ⚠️ **Warning**: The structure is too large. Please upload a smaller one or use the [local implementation of CartNet Web App](https://github.com/alexsoleg/cartnet-streamlit/).
-                """)
-                raise ValueError("Please provide a structure with less than 1000 atoms in the unit cell.")
-            data.pos = torch.tensor(atoms.positions, dtype=torch.float32)
-            data.temperature_og = torch.tensor([temperature], dtype=torch.float32)
-            data.temperature = (data.temperature_og - MEAN_TEMP) / STD_TEMP
-            data.cell = torch.tensor(atoms.cell.array, dtype=torch.float32).unsqueeze(0)
-            data.pbc = torch.tensor([True, True, True])
-            data.natoms = len(atoms)
-            del atoms
-            gc.collect()
-            batch = Batch.from_data_list([data])
-            edge_index, _, _, edge_attr = radius_graph_pbc(batch, 5.0, 64)
-            del batch
-            gc.collect()
-            data.cart_dist = torch.norm(edge_attr, dim=-1)
-            data.cart_dir = torch.nn.functional.normalize(edge_attr, dim=-1)
-            data.edge_index = edge_index
-            data.non_H_mask = data.x != 1
-            delattr(data, "pbc")
-            delattr(data, "natoms")
-            batch = Batch.from_data_list([data])
-            del data, edge_index, edge_attr
-            gc.collect()
-            st.success("Graph successfully created.")
-            process_data(batch, model)
-            st.success("ADPs successfully predicted.")
-            # Create a download button for the processed CIF file
-            with open("output.cif", "r") as f:
-                cif_contents = f.read()
-            st.download_button(
-                label="Download processed CIF file",
-                data=cif_contents,
-                file_name="output.cif",
-                mime="text/plain"
-            )
-            os.remove("output.cif")
-            os.remove(filename)
             gc.collect()
         except Exception as e:
             st.error(f"An error occurred while reading the CIF file: {e}")
-    st.markdown("""
     ⚠️ **Warning**: This online web application is designed for structures with up to 1000 atoms in the unit cell. For larger structures, please use the [local implementation of CartNet Web App](https://github.com/alexsoleg/cartnet-streamlit/).
     """)
     st.markdown("""
     📌 The official implementation of the paper with all experiments can be found at [CartNet GitHub Repository](https://huggingface.co/spaces/alexsoleg/cartnet-demo/tree/main).
@@ -128,3 +145,5 @@ def main():
 if __name__ == "__main__":
     main()

 from process import process_data
 from utils import radius_graph_pbc
 import gc
+from io import BytesIO, StringIO
 MEAN_TEMP = torch.tensor(192.1785) #training temp mean
 STD_TEMP = torch.tensor(81.2135) #training temp std
 @torch.no_grad()
 def main():
     model = create_model()
+    st.title("CartNet Thermal Ellipsoid Prediction")
     st.image('fig/pipeline.png')
     st.markdown("""
     """)
     uploaded_file = st.file_uploader("Upload a CIF file", type=["cif"], accept_multiple_files=False)
     if uploaded_file is not None:
         try:
             filename = str(uploaded_file.name)
+            file = BytesIO(uploaded_file.getbuffer())
+            cif = ReadCif(file)
+            if len(cif.keys())>1:
+                st.warning("⚠️ **Warning**: Found " + str(len(cif.keys())) + " blocks in the CIF file. We will process all of them and export as separate CIF files.")
+            st.markdown(f"### CIF file: {filename}")
+            for key in cif.keys():
+                st.markdown(f"### Block: {key}")
+                try:
+                    block = "data_"+str(key)+"\n"+ cif[key].printsection()
+                    atoms = read(StringIO(block), format="cif")
+                    if len(atoms.positions) > 1000:
+                        st.error("""
+                        ⚠️ **Warning**: The structure is too large. Please upload a smaller one or use the [local implementation of CartNet Web App](https://github.com/alexsoleg/cartnet-streamlit/).
+                        """)
+                        continue
+                    cif_data = cif[key]
+                    if "_diffrn_ambient_temperature" in cif_data.keys():
+                        temperature = float(cif_data["_diffrn_ambient_temperature"].split("(")[0])
+                    elif "_cell_measurement_temperature" in cif_data.keys():
+                        temperature = float(cif_data["_cell_measurement_temperature"].split("(")[0])
+                    else:
+                        st.error("Temperature not found in the CIF file. \
+                                        Please provide a temperature in the field _diffrn_ambient_temperature o in the field _cell_measurement_temperature from the CIF file.")
+                        continue
+                    st.success("CIF file successfully read.")
+                except Exception as e:
+                    st.error(f"Error: {e}")
+                    st.error(f"We couldn't find any structure for the block {key}. Please make sure the cif is compatible with ASE. If the error message is a blank line, it means ASE didn't found any coordinates.")
+                    continue
+                data = Data()
+                data.x = torch.tensor(atoms.get_atomic_numbers(), dtype=torch.int32)
+                data.pos = torch.tensor(atoms.positions, dtype=torch.float32)
+                data.temperature_og = torch.tensor([temperature], dtype=torch.float32)
+                data.temperature = (data.temperature_og - MEAN_TEMP) / STD_TEMP
+                data.cell = torch.tensor(atoms.cell.array, dtype=torch.float32).unsqueeze(0)
+                data.pbc = torch.tensor([True, True, True])
+                data.natoms = len(atoms)
+                del atoms
+                gc.collect()
+                batch = Batch.from_data_list([data])
+                edge_index, _, _, edge_attr = radius_graph_pbc(batch, 5.0, 64)
+                del batch
+                gc.collect()
+                data.cart_dist = torch.norm(edge_attr, dim=-1)
+                data.cart_dir = torch.nn.functional.normalize(edge_attr, dim=-1)
+                data.edge_index = edge_index
+                data.non_H_mask = data.x != 1
+                delattr(data, "pbc")
+                delattr(data, "natoms")
+                batch = Batch.from_data_list([data])
+                del data, edge_index, edge_attr
+                gc.collect()
+                st.success("Graph successfully created.")
+                cif_file = process_data(batch, model)
+                st.success("ADPs successfully predicted.")
+                cif_file = BytesIO(cif_file.getvalue().encode())
+                st.download_button(
+                    label="Download processed CIF file",
+                    data=cif_file,
+                    file_name=f"output_{key}.cif",
+                    mime="text/plain",
+                    key=f"download_button_{key}"
+                )
+                gc.collect()
             gc.collect()
         except Exception as e:
             st.error(f"An error occurred while reading the CIF file: {e}")
+    st.warning("""
     ⚠️ **Warning**: This online web application is designed for structures with up to 1000 atoms in the unit cell. For larger structures, please use the [local implementation of CartNet Web App](https://github.com/alexsoleg/cartnet-streamlit/).
     """)
+    st.warning("""
+    ⚠️ **Warning**: We use [ASE library](https://wiki.fysik.dtu.dk/ase/) for reading the cif files, please make sure it is compatible.
+    """)
     st.markdown("""
     📌 The official implementation of the paper with all experiments can be found at [CartNet GitHub Repository](https://huggingface.co/spaces/alexsoleg/cartnet-demo/tree/main).
 if __name__ == "__main__":
     main()

main_local.py CHANGED Viewed

@@ -17,7 +17,7 @@ STD_TEMP = torch.tensor(81.2135) #training temp std
 @torch.no_grad()
 def main():
     model = create_model()
-    st.title("CartNet ADP Prediction")
     st.image('fig/pipeline.png')
     st.markdown("""
@@ -25,92 +25,86 @@ def main():
     """)
     uploaded_file = st.file_uploader("Upload a CIF file", type=["cif"], accept_multiple_files=False)
     if uploaded_file is not None:
         try:
             filename = str(uploaded_file.name)
             file = BytesIO(uploaded_file.getbuffer())
             cif = ReadCif(file)
-            print(cif.keys())
-            if len(cif.keys())>1:
-                st.markdown("Found " + str(len(cif.keys())) + " blocks in the CIF file. We will process all of them and export as separate CIF files.")
-            for key in cif.keys():
-                print(key)
-                # print(cif[key])
-                block = "data_"+str(key)+"\n"+ cif[key].printsection()
-                atoms = read(StringIO(block), format="cif")
-                print("atoms")
-                print(atoms)
-            # atoms = read(atoms_2, format="cif")
-            # with open(uploaded_file.name, "wb") as f:
-            #     f.write(uploaded_file.getbuffer())
-            # filename = str(uploaded_file.name)
-            # # Read the CIF file using ASE
-            # atoms = read(filename, format="cif")
-            # cif = ReadCif(filename)
-            # print(cif.keys())
-            # print(len(atoms))
-            # # st.markdown(cif)
-            # cif_data = cif
-            # st.markdown(f"### CIF file: {filename}")
-            # temperature = 100
-            # if "_diffrn_ambient_temperature" in cif_data.keys():
-            #     temperature = float(cif_data["_diffrn_ambient_temperature"].split("(")[0])
-            # elif "_cell_measurement_temperature" in cif_data.keys():
-            #     temperature = float(cif_data["_cell_measurement_temperature"].split("(")[0])
-            # else:
-            #     raise ValueError("Temperature not found in the CIF file. \
-            #                         Please provide a temperature in the field _diffrn_ambient_temperature o in the field _cell_measurement_temperature from the CIF file.")
-            # st.success("CIF file successfully read.")
-            # data = Data()
-            # data.x = torch.tensor(atoms.get_atomic_numbers(), dtype=torch.int32)
-            # data.pos = torch.tensor(atoms.positions, dtype=torch.float32)
-            # data.temperature_og = torch.tensor([temperature], dtype=torch.float32)
-            # data.temperature = (data.temperature_og - MEAN_TEMP) / STD_TEMP
-            # data.cell = torch.tensor(atoms.cell.array, dtype=torch.float32).unsqueeze(0)
-            # data.pbc = torch.tensor([True, True, True])
-            # data.natoms = len(atoms)
-            # del atoms
-            # gc.collect()
-            # batch = Batch.from_data_list([data])
-            # edge_index, _, _, edge_attr = radius_graph_pbc(batch, 5.0, 64)
-            # del batch
-            # gc.collect()
-            # data.cart_dist = torch.norm(edge_attr, dim=-1)
-            # data.cart_dir = torch.nn.functional.normalize(edge_attr, dim=-1)
-            # data.edge_index = edge_index
-            # data.non_H_mask = data.x != 1
-            # delattr(data, "pbc")
-            # delattr(data, "natoms")
-            # batch = Batch.from_data_list([data])
-            # del data, edge_index, edge_attr
-            # gc.collect()
-            # st.success("Graph successfully created.")
-            # process_data(batch, model)
-            # st.success("ADPs successfully predicted.")
-            # # Create a download button for the processed CIF file
-            # with open("output.cif", "r") as f:
-            #     cif_contents = f.read()
-            # st.download_button(
-            #     label="Download processed CIF file",
-            #     data=cif_contents,
-            #     file_name="output.cif",
-            #     mime="text/plain"
-            # )
-            # os.remove("output.cif")
-            # os.remove(filename)
-            # gc.collect()
         except Exception as e:
             st.error(f"An error occurred while reading the CIF file: {e}")
@@ -119,6 +113,10 @@ def main():
     📌 The official implementation of the paper with all experiments can be found at [CartNet GitHub Repository](https://github.com/imatge-upc/CartNet).
     """)
     st.markdown("""
     ### How to cite

 @torch.no_grad()
 def main():
     model = create_model()
+    st.title("CartNet Thermal Ellipsoid Prediction")
     st.image('fig/pipeline.png')
     st.markdown("""
     """)
     uploaded_file = st.file_uploader("Upload a CIF file", type=["cif"], accept_multiple_files=False)
     if uploaded_file is not None:
         try:
             filename = str(uploaded_file.name)
             file = BytesIO(uploaded_file.getbuffer())
             cif = ReadCif(file)
+            if len(cif.keys())>1:
+                st.warning("⚠️ **Warning**: Found " + str(len(cif.keys())) + " blocks in the CIF file. We will process all of them and export as separate CIF files.")
+            st.markdown(f"### CIF file: {filename}")
+            for key in cif.keys():
+                st.markdown(f"### Block: {key}")
+                try:
+                    block = "data_"+str(key)+"\n"+ cif[key].printsection()
+                    atoms = read(StringIO(block), format="cif")
+                    cif_data = cif[key]
+                    if "_diffrn_ambient_temperature" in cif_data.keys():
+                        temperature = float(cif_data["_diffrn_ambient_temperature"].split("(")[0])
+                    elif "_cell_measurement_temperature" in cif_data.keys():
+                        temperature = float(cif_data["_cell_measurement_temperature"].split("(")[0])
+                    else:
+                        st.error("Temperature not found in the CIF file. \
+                                        Please provide a temperature in the field _diffrn_ambient_temperature o in the field _cell_measurement_temperature from the CIF file.")
+                        continue
+                    st.success("CIF file successfully read.")
+                except Exception as e:
+                    st.error(f"Error: {e}")
+                    st.error(f"We couldn't find any structure for the block {key}. Please make sure the cif is compatible with ASE. If the error message is a blank line, it means ASE didn't found any coordinates.")
+                    continue
+                data = Data()
+                data.x = torch.tensor(atoms.get_atomic_numbers(), dtype=torch.int32)
+                data.pos = torch.tensor(atoms.positions, dtype=torch.float32)
+                data.temperature_og = torch.tensor([temperature], dtype=torch.float32)
+                data.temperature = (data.temperature_og - MEAN_TEMP) / STD_TEMP
+                data.cell = torch.tensor(atoms.cell.array, dtype=torch.float32).unsqueeze(0)
+                data.pbc = torch.tensor([True, True, True])
+                data.natoms = len(atoms)
+                del atoms
+                gc.collect()
+                batch = Batch.from_data_list([data])
+                edge_index, _, _, edge_attr = radius_graph_pbc(batch, 5.0, 64)
+                del batch
+                gc.collect()
+                data.cart_dist = torch.norm(edge_attr, dim=-1)
+                data.cart_dir = torch.nn.functional.normalize(edge_attr, dim=-1)
+                data.edge_index = edge_index
+                data.non_H_mask = data.x != 1
+                delattr(data, "pbc")
+                delattr(data, "natoms")
+                batch = Batch.from_data_list([data])
+                del data, edge_index, edge_attr
+                gc.collect()
+                st.success("Graph successfully created.")
+                cif_file = process_data(batch, model)
+                st.success("ADPs successfully predicted.")
+                cif_file = BytesIO(cif_file.getvalue().encode())
+                st.download_button(
+                    label="Download processed CIF file",
+                    data=cif_file,
+                    file_name=f"output_{key}.cif",
+                    mime="text/plain",
+                    key=f"download_button_{key}"
+                )
+                gc.collect()
+            gc.collect()
         except Exception as e:
             st.error(f"An error occurred while reading the CIF file: {e}")
     📌 The official implementation of the paper with all experiments can be found at [CartNet GitHub Repository](https://github.com/imatge-upc/CartNet).
     """)
+    st.warning("""
+    ⚠️ **Warning**: We use [ASE library](https://wiki.fysik.dtu.dk/ase/) for reading the cif files, please make sure it is compatible.
+    """)
     st.markdown("""
     ### How to cite

process.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 from ase.io import write
 from ase import Atoms
 import gc
 @torch.no_grad()
 def process_data(batch, model, output_file="output.cif"):
@@ -35,11 +36,12 @@ def process_data(batch, model, output_file="output.cif"):
     # Convert positions to fractional coordinates
     fractional_positions = ase_atoms.get_scaled_positions()
-    # Write to CIF file
-    write(output_file, ase_atoms)
-    with open(output_file, 'r') as file:
-        lines = file.readlines()
     # Find the line where "loop_" appears and remove lines from there to the end
     for i, line in enumerate(lines):
@@ -47,54 +49,51 @@ def process_data(batch, model, output_file="output.cif"):
             lines = lines[:i]
             break
-    # Write the modified lines to a new output file
-    with open(output_file, 'w') as file:
-        file.writelines(lines)
-    # Manually append positions and ADPs to the CIF file
-    with open(output_file, 'a') as cif_file:
-        # Write temperature
-        cif_file.write(f"\n_diffrn_ambient_temperature    {temperature}\n")
-        # Write atomic positions
-        cif_file.write("\nloop_\n")
-        cif_file.write("_atom_site_label\n")
-        cif_file.write("_atom_site_type_symbol\n")
-        cif_file.write("_atom_site_fract_x\n")
-        cif_file.write("_atom_site_fract_y\n")
-        cif_file.write("_atom_site_fract_z\n")
-        cif_file.write("_atom_site_U_iso_or_equiv\n")
-        cif_file.write("_atom_site_thermal_displace_type\n")
-        element_count = {}
-        for i, (atom_number, frac_pos) in enumerate(zip(atoms, fractional_positions)):
-            element = ase_atoms[i].symbol
-            assert atom_number == ase_atoms[i].number
-            if element not in element_count:
-                element_count[element] = 0
-            element_count[element] += 1
-            label = f"{element}{element_count[element]}"
-            u_iso = torch.trace(adps[indices[i]]).mean() if element != 'H' else 0.01
-            type = "Uani" if element != 'H' else "Uiso"
-            cif_file.write(f"{label} {element} {frac_pos[0]} {frac_pos[1]} {frac_pos[2]} {u_iso} {type}\n")
-        # Write ADPs
-        cif_file.write("\nloop_\n")
-        cif_file.write("_atom_site_aniso_label\n")
-        cif_file.write("_atom_site_aniso_U_11\n")
-        cif_file.write("_atom_site_aniso_U_22\n")
-        cif_file.write("_atom_site_aniso_U_33\n")
-        cif_file.write("_atom_site_aniso_U_23\n")
-        cif_file.write("_atom_site_aniso_U_13\n")
-        cif_file.write("_atom_site_aniso_U_12\n")
-        element_count = {}
-        for i, atom_number in enumerate(atoms):
-            if atom_number == 1:
-                continue
-            element = ase_atoms[i].symbol
-            if element not in element_count:
-                element_count[element] = 0
-            element_count[element] += 1
-            label = f"{element}{element_count[element]}"
-            cif_file.write(f"{label} {adps[indices[i],0,0]} {adps[indices[i],1,1]} {adps[indices[i],2,2]} {adps[indices[i],1,2]} {adps[indices[i],0,2]} {adps[indices[i],0,1]}\n")

 from ase.io import write
 from ase import Atoms
 import gc
+from io import BytesIO, StringIO
 @torch.no_grad()
 def process_data(batch, model, output_file="output.cif"):
     # Convert positions to fractional coordinates
     fractional_positions = ase_atoms.get_scaled_positions()
+    # Instead of reading from file, get CIF content directly from ASE's write function
+    cif_content = BytesIO()
+    write(cif_content, ase_atoms, format='cif')
+    lines = cif_content.getvalue().decode('utf-8').splitlines(True)
+    cif_content.close()
     # Find the line where "loop_" appears and remove lines from there to the end
     for i, line in enumerate(lines):
             lines = lines[:i]
             break
+    # Use StringIO to build the CIF content
+    cif_file = StringIO()
+    cif_file.writelines(lines)
+    # Write temperature
+    cif_file.write(f"\n_diffrn_ambient_temperature    {temperature}\n")
+    # Write atomic positions
+    cif_file.write("\nloop_\n")
+    cif_file.write("_atom_site_label\n")
+    cif_file.write("_atom_site_type_symbol\n")
+    cif_file.write("_atom_site_fract_x\n")
+    cif_file.write("_atom_site_fract_y\n")
+    cif_file.write("_atom_site_fract_z\n")
+    cif_file.write("_atom_site_U_iso_or_equiv\n")
+    cif_file.write("_atom_site_thermal_displace_type\n")
+    element_count = {}
+    for i, (atom_number, frac_pos) in enumerate(zip(atoms, fractional_positions)):
+        element = ase_atoms[i].symbol
+        assert atom_number == ase_atoms[i].number
+        if element not in element_count:
+            element_count[element] = 0
+        element_count[element] += 1
+        label = f"{element}{element_count[element]}"
+        u_iso = torch.trace(adps[indices[i]]).mean() if element != 'H' else 0.01
+        type = "Uani" if element != 'H' else "Uiso"
+        cif_file.write(f"{label} {element} {frac_pos[0]} {frac_pos[1]} {frac_pos[2]} {u_iso} {type}\n")
+    # Write ADPs
+    cif_file.write("\nloop_\n")
+    cif_file.write("_atom_site_aniso_label\n")
+    cif_file.write("_atom_site_aniso_U_11\n")
+    cif_file.write("_atom_site_aniso_U_22\n")
+    cif_file.write("_atom_site_aniso_U_33\n")
+    cif_file.write("_atom_site_aniso_U_23\n")
+    cif_file.write("_atom_site_aniso_U_13\n")
+    cif_file.write("_atom_site_aniso_U_12\n")
+    element_count = {}
+    for i, atom_number in enumerate(atoms):
+        if atom_number == 1:
+            continue
+        element = ase_atoms[i].symbol
+        if element not in element_count:
+            element_count[element] = 0
+        element_count[element] += 1
+        label = f"{element}{element_count[element]}"
+        cif_file.write(f"{label} {adps[indices[i],0,0]} {adps[indices[i],1,1]} {adps[indices[i],2,2]} {adps[indices[i],1,2]} {adps[indices[i],0,2]} {adps[indices[i],0,1]}\n")
+    return cif_file

utils.py CHANGED Viewed

@@ -264,7 +264,7 @@ def get_max_neighbors_mask(
         + torch.arange(len(index), device=device)
         - index_neighbor_offset_expand
     )
-    print(index_sort_map.dtype, atom_distance.dtype)
     distance_sort.index_copy_(0, index_sort_map, atom_distance)
     distance_sort = distance_sort.view(num_atoms, max_num_neighbors)

         + torch.arange(len(index), device=device)
         - index_neighbor_offset_expand
     )
     distance_sort.index_copy_(0, index_sort_map, atom_distance)
     distance_sort = distance_sort.view(num_atoms, max_num_neighbors)