diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b77910457e1b166a73f06a739fbdf9bdd2c3e612 --- /dev/null +++ b/.gitignore @@ -0,0 +1,180 @@ +logs/ +lightning_logs/ +wandb/ +Data/ +wandb*.json +**/*.ckpt +**/*.pth +**/*.json +.vscode/ +**/*.pdb +ckpt/ +*.code-workspace +outputs/ +**/*.txt +**/lightning_logs/ +**/inference_outputs/ +.hydra +preprocessed/ +misc/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..b36806447dfa12c1a69b0c28a63e29291cdc0410 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Cedlijh + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..7ff660efa331083ac9b0697a293905b5e35289d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,106 @@ ---- -license: mit ---- +# PepFlow: Full-Atom Peptide Design + +![alt text](teaser.png) + + +This repository contains the official implementation of 💡 Full-Atom Peptide Design based on Multi-modal Flow Matching (ICML 2024). + +You can find our [paper](https://arxiv.org/abs/2406.00735) here. We also appreciate the inspiration from [diffab](https://github.com/luost26/diffab) and [frameflow](https://github.com/microsoft/protein-frame-flow). + +If you have any questions, please contact lijiahanypc@pku.edu.cn or ced3ljhypc@gmail.com. Thank you! :) + +## Install + + +### Environment + +Please replace cuda and torch version to match your machine, here we test our code on CUDA >= 11.7, we also suggest using [micromamba](https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html) as a replace of conda. + +```bash +conda env create -f environment.yml # or use micromamba instead of conda + +conda activate flow + +pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.0+cu117.html + +pip install joblib lmdb easydict + +``` + +### Clone Repo### Train + +```bash +git clone https://github.com/Ced3-han/PepFlowww.git +``` + +We suggest adding the code to the Python environment variable, or you can use setup tools. + + ```bash +export PYTHONPATH=$(pwd):$PYTHONPATH +python setup.py develop + ``` + + +### Data and Weights Download + +We provide data and pretrained model weights [here](https://drive.google.com/drive/folders/1bHaKDF3uCDPtfsihjZs0zmjwF6UU1uVl?usp=sharing). + ++ PepMerge_release.zip: 1.2GB ++ PepMerge_lmdb.zip: 180MB ++ model1.pt: 80MB ++ model2.pt: 80MB + +The ```PepMerge_release.zip``` contains filtered data of peptide-receptor pairs. For example, in the folder ```1a0n_A```, the ```P``` chain in the PDB file ```1a0n``` is the peptide. In this folder, we provide the FASTA and PDB files of the peptide and receptor. The postfix _merge means the peptide and receptor are in the same PDB file. We also extract the binding pocket of the receptor, where our model is trained to generate peptides based on the binding pocket. You can also download [PepBDB](http://huanglab.phys.hust.edu.cn/pepbdb/db/1cta_A/) and [QBioLip](https://yanglab.qd.sdu.edu.cn/Q-BioLiP/Download), and use ```playgrounds/gen_dataset```.ipynb to reproduce the dataset. + +The ```PepMerge_lmdb.zip``` contains several different splits of the dataset. We use ```mmseqs2``` to cluster complexes based on receptor sequence identity. See ```playgrounds/cluster.ipynb``` for details. The names.txt file contains the names of complexes in the test set. You can use ```models_con/pep_dataloader.py``` to load these datasets. We suggest putting these LMDBs in a single ```Data``` folder. + +Besides, ```model1.pt``` and ```model2.pt``` are two checkpoints that you can load using ```models_con/flow_model.py``` together with the config file configs/learn_angle.yaml. We suggest using model1 for benchmark evaluation and model2 for real-world peptide design tasks, the latter is trained on a larger dataset. + + +## Usage + +We will add more user-friendly straightforward pipelines (generation and evaluation) later. + +### Inference and Generate + +By default, we support sampling of generated peptides from our processed dataset. You can use ```models_con/sample.py``` to sample, and ```models_con/inference.py``` to reconstruct PDB files. + +If you want to use your own data, you can organize your data (peptide and pocket) as we did in PepMerge_release and construct a dataset for sampling and reconstruction. You can also use ```models_con/pep_dataloader/preprocess_structure``` to parse a single data point. + + + + +### Evaluation + +Our evaluation involves many third-party packages, and we include some useful evaluation scripts in ```eval```. Please refer to our paper for details and download the corresponding packages for evaluation. Please use different python environments for these tools. + + + +### Train + +You can also ```train.py``` on single GPU training and ```train_ddp.py``` for multiple GPT training. + + +## Future Work + + +Future improvements on peptide generation models may include chemical modifications, non-canonical amino acids, pretraining on larger datasets, language models, better sampling methods, etc. Stay tuned and feel free to contact us for collaboration and discussion! + + + +## Reference + +```bibtex +@InProceedings{pmlr-v235-li24o, + title={Full-Atom Peptide Design based on Multi-modal Flow Matching}, + author={Li, Jiahan and Cheng, Chaoran and Wu, Zuofan and Guo, Ruihan and Luo, Shitong and Ren, Zhizhou and Peng, Jian and Ma, Jianzhu}, + booktitle={Proceedings of the 41st International Conference on Machine Learning}, + pages={27615--27640}, + year={2024}, + editor={Salakhutdinov, Ruslan and Kolter, Zico and Heller, Katherine and Weller, Adrian and Oliver, Nuria and Scarlett, Jonathan and Berkenkamp, Felix}, + volume={235}, + series={Proceedings of Machine Learning Research}, + month=21--27 Jul}, + publisher={PMLR}, +} +``` \ No newline at end of file diff --git a/configs/learn_angle.yaml b/configs/learn_angle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31aafe14ebb7f2b22b8ba0034d29a8b46378ba61 --- /dev/null +++ b/configs/learn_angle.yaml @@ -0,0 +1,74 @@ +model: + encoder: + node_embed_size: 128 + edge_embed_size: 64 + ipa: + c_s: 128 #${model.node_embed_size} + c_z: 64 #${model.edge_embed_size} + c_hidden: 128 + no_heads: 8 + no_qk_points: 8 + no_v_points: 12 + seq_tfmr_num_heads: 4 + seq_tfmr_num_layers: 2 + num_blocks: 6 + stop_grad: False + interpolant: + min_t: 1.e-2 + t_normalization_clip: 0.9 + sample_sequence: True + sample_structure: True + rots: + train_schedule: linear + sample_schedule: exp + exp_rate: 10 + trans: + train_schedule: linear + sample_schedule: linear + sigma: 1.0 + seqs: + num_classes: 20 + simplex_value: 5.0 + sampling: + num_timesteps: 100 + self_condition: False + +train: + loss_weights: + trans_loss: 0.5 # 1.0 for dreamfold, 0.05 for yim + rot_loss: 0.5 # 1.0 for dreamfold, 0.5 for yim + bb_atom_loss: 0.25 + seqs_loss: 1.0 + angle_loss: 1.0 + torsion_loss: 0.5 + max_iters: 400000000 + val_freq: 20000 + batch_size: 32 + accum_grad: 1 + seed: 114514 + max_grad_norm: 100.0 + optimizer: + type: adam + lr: 5.e-4 #1.e-4 + weight_decay: 0.0 + beta1: 0.9 + beta2: 0.999 + scheduler: + type: plateau + factor: 0.8 + patience: 10 + min_lr: 5.e-6 + +dataset: + train: + type: peprec + structure_dir: /datapool/data2/home/jiahan/Data/PepMerge_new/ + dataset_dir: /datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Fixed Data + name: pep_pocket_train + reset: False + val: + type: peprec + structure_dir: /datapool/data2/home/jiahan/Data/PepMerge_new/ + dataset_dir: /datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Fixed Data + name: pep_pocket_test + reset: False \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..abfe151f4d26b74fab7aa0ac5ed1b319b1ca1d60 --- /dev/null +++ b/environment.yml @@ -0,0 +1,261 @@ +name: flow +channels: +- conda-forge +- nvidia +- pytorch +dependencies: +- _libgcc_mutex==0.1=conda_forge +- _openmp_mutex==4.5=2_gnu +- anyio==3.7.1=pyhd8ed1ab_0 +- argon2-cffi==21.3.0=pyhd8ed1ab_0 +- argon2-cffi-bindings==21.2.0=py310h5764c6d_3 +- arrow==1.2.3=pyhd8ed1ab_0 +- asttokens==2.2.1=pyhd8ed1ab_0 +- astunparse==1.6.3=pyhd8ed1ab_0 +- async-lru==2.0.4=pyhd8ed1ab_0 +- attrs==23.1.0=pyh71513ae_1 +- babel==2.12.1=pyhd8ed1ab_1 +- backcall==0.2.0=pyh9f0ad1d_0 +- backports==1.0=pyhd8ed1ab_3 +- backports.functools_lru_cache==1.6.5=pyhd8ed1ab_0 +- beautifulsoup4==4.12.2=pyha770c72_0 +- biopython==1.81=py310h1fa729e_0 +- biotite==0.38.0 +- bleach==6.0.0=pyhd8ed1ab_0 +- blosc==1.21.4=h0f2a231_0 +- brotli==1.0.9=h166bdaf_9 +- brotli-bin==1.0.9=h166bdaf_9 +- brotli-python==1.0.9=py310hd8f1fbe_9 +- bzip2==1.0.8=h7f98852_4 +- c-ares==1.19.1=hd590300_0 +- c-blosc2==2.10.2=hb4ffafa_0 +- ca-certificates==2023.7.22=hbcca054_0 +- cached-property==1.5.2=hd8ed1ab_1 +- cached_property==1.5.2=pyha770c72_1 +- certifi==2023.7.22=pyhd8ed1ab_0 +- cffi==1.15.1=py310h255011f_3 +- charset-normalizer==3.2.0=pyhd8ed1ab_0 +- comm==0.1.4=pyhd8ed1ab_0 +- contourpy==1.1.0=py310hd41b1e2_0 +- cuda==11.6.0=0 +- cuda-cccl==11.6.55=hf6102b2_0 +- cuda-command-line-tools==11.6.2=0 +- cuda-compiler==11.6.2=0 +- cuda-cudart==11.6.55=he381448_0 +- cuda-cudart-dev==11.6.55=h42ad0f4_0 +- cuda-cuobjdump==11.6.124=h2eeebcb_0 +- cuda-cupti==11.6.124=h86345e5_0 +- cuda-cuxxfilt==11.6.124=hecbf4f6_0 +- cuda-driver-dev==11.6.55=0 +- cuda-gdb==12.0.90=hd47b8d6_0 +- cuda-libraries==11.6.2=0 +- cuda-libraries-dev==11.6.0=0 +- cuda-memcheck==11.8.86=0 +- cuda-nsight==12.0.78=ha770c72_0 +- cuda-nsight-compute==12.2.1=0 +- cuda-nvcc==11.6.124=hbba6d2d_0 +- cuda-nvdisasm==12.0.76=h59595ed_0 +- cuda-nvml-dev==11.6.55=haa9ef22_0 +- cuda-nvprof==12.0.90=h59595ed_0 +- cuda-nvprune==11.6.124=he22ec0a_0 +- cuda-nvrtc==11.6.124=h020bade_0 +- cuda-nvrtc-dev==11.6.124=h249d397_0 +- cuda-nvtx==11.6.124=h0630a44_0 +- cuda-nvvp==12.0.90=h59595ed_0 +- cuda-runtime==11.6.2=0 +- cuda-samples==11.6.101=h8efea70_0 +- cuda-sanitizer-api==12.0.90=h59595ed_0 +- cuda-toolkit==11.6.0=0 +- cuda-tools==11.6.0=0 +- cuda-version==12.0=hffde075_2 +- cuda-visual-tools==11.6.0=0 +- cycler==0.11.0=pyhd8ed1ab_0 +- debugpy==1.6.8=py310hc6cd4ac_0 +- decorator==5.1.1=pyhd8ed1ab_0 +- defusedxml==0.7.1=pyhd8ed1ab_0 +- entrypoints==0.4=pyhd8ed1ab_0 +- exceptiongroup==1.1.3=pyhd8ed1ab_0 +- executing==1.2.0=pyhd8ed1ab_0 +- flit-core==3.9.0=pyhd8ed1ab_0 +- fonttools==4.42.0=py310h2372a71_0 +- fqdn==1.5.1=pyhd8ed1ab_0 +- freetype==2.12.1=hca18f0e_1 +- gds-tools==1.5.0.59=hcb278e6_0 +- gmp==6.2.1=h58526e2_0 +- hdf5==1.14.1=nompi_h4f84152_100 +- idna==3.4=pyhd8ed1ab_0 +- importlib-metadata==6.8.0=pyha770c72_0 +- importlib_metadata==6.8.0=hd8ed1ab_0 +- importlib_resources==6.0.1=pyhd8ed1ab_0 +- ipykernel==6.25.1=pyh71e2992_0 +- ipython==8.14.0=pyh41d4057_0 +- isoduration==20.11.0=pyhd8ed1ab_0 +- jedi==0.19.0=pyhd8ed1ab_0 +- jinja2==3.1.2=pyhd8ed1ab_1 +- json5==0.9.14=pyhd8ed1ab_0 +- jsonpointer==2.0=py_0 +- jsonschema==4.19.0=pyhd8ed1ab_1 +- jsonschema-specifications==2023.7.1=pyhd8ed1ab_0 +- jsonschema-with-format-nongpl==4.19.0=pyhd8ed1ab_1 +- jupyter-lsp==2.2.0=pyhd8ed1ab_0 +- jupyter_client==8.3.0=pyhd8ed1ab_0 +- jupyter_core==5.3.1=py310hff52083_0 +- jupyter_events==0.7.0=pyhd8ed1ab_2 +- jupyter_server==2.7.1=pyhd8ed1ab_0 +- jupyter_server_terminals==0.4.4=pyhd8ed1ab_1 +- jupyterlab==4.0.5=pyhd8ed1ab_0 +- jupyterlab_pygments==0.2.2=pyhd8ed1ab_0 +- jupyterlab_server==2.24.0=pyhd8ed1ab_0 +- keyutils==1.6.1=h166bdaf_0 +- kiwisolver==1.4.4=py310hbf28c38_1 +- krb5==1.21.2=h659d440_0 +- lcms2==2.15=haa2dc70_1 +- ld_impl_linux-64==2.40=h41732ed_0 +- lerc==4.0.0=h27087fc_0 +- libaec==1.0.6=hcb278e6_1 +- libblas==3.9.0=17_linux64_openblas +- libbrotlicommon==1.0.9=h166bdaf_9 +- libbrotlidec==1.0.9=h166bdaf_9 +- libbrotlienc==1.0.9=h166bdaf_9 +- libcblas==3.9.0=17_linux64_openblas +- libcublas==12.0.1.189=hcb278e6_2 +- libcublas-dev==12.0.1.189=hcb278e6_2 +- libcufft==11.0.0.21=hcb278e6_1 +- libcufft-dev==11.0.0.21=hcb278e6_1 +- libcufile==1.5.0.59=hcb278e6_0 +- libcufile-dev==1.5.0.59=hcb278e6_0 +- libcurand==10.3.1.50=hcb278e6_0 +- libcurand-dev==10.3.1.50=hcb278e6_0 +- libcurl==8.2.1=hca28451_0 +- libcusolver==11.4.2.57=hcb278e6_1 +- libcusparse==12.0.0.76=hcb278e6_1 +- libdeflate==1.18=h0b41bf4_0 +- libedit==3.1.20191231=he28a2e2_2 +- libev==4.33=h516909a_1 +- libffi==3.4.2=h7f98852_5 +- libgcc-ng==13.1.0=he5830b7_0 +- libgfortran-ng==13.1.0=h69a702a_0 +- libgfortran5==13.1.0=h15d22d2_0 +- libgomp==13.1.0=he5830b7_0 +- libjpeg-turbo==2.1.5.1=h0b41bf4_0 +- liblapack==3.9.0=17_linux64_openblas +- libnghttp2==1.52.0=h61bc06f_0 +- libnpp==12.0.0.30=h59595ed_0 +- libnpp-dev==12.0.0.30=h59595ed_0 +- libnsl==2.0.0=h7f98852_0 +- libnuma==2.0.16=h0b41bf4_1 +- libnvjitlink==12.0.76=hcb278e6_1 +- libnvjpeg==12.0.0.28=hcb278e6_0 +- libnvjpeg-dev==12.0.0.28=ha770c72_0 +- libopenblas==0.3.23=pthreads_h80387f5_0 +- libpng==1.6.39=h753d276_0 +- libsodium==1.0.18=h36c2ea0_1 +- libsqlite==3.42.0=h2797004_0 +- libssh2==1.11.0=h0841786_0 +- libstdcxx-ng==13.1.0=hfd8a6a1_0 +- libtiff==4.5.1=h8b53f26_0 +- libuuid==2.38.1=h0b41bf4_0 +- libwebp-base==1.3.1=hd590300_0 +- libxcb==1.15=h0b41bf4_0 +- libzlib==1.2.13=hd590300_5 +- lz4-c==1.9.4=hcb278e6_0 +- lzo==2.10=h516909a_1000 +- markupsafe==2.1.3=py310h2372a71_0 +- matplotlib-base==3.7.2=py310hf38f957_0 +- matplotlib-inline==0.1.6=pyhd8ed1ab_0 +- mdtraj==1.9.9=py310h8e08b51_0 +- mistune==3.0.1=pyhd8ed1ab_0 +- munkres==1.1.4=pyh9f0ad1d_0 +- nbclient==0.8.0=pyhd8ed1ab_0 +- nbconvert-core==7.7.4=pyhd8ed1ab_0 +- nbformat==5.9.2=pyhd8ed1ab_0 +- ncurses==6.4=hcb278e6_0 +- nest-asyncio==1.5.6=pyhd8ed1ab_0 +- nomkl==1.0=h5ca1d4c_0 +- notebook-shim==0.2.3=pyhd8ed1ab_0 +- nsight-compute==2023.2.1.3=0 +- numexpr==2.8.4=py310hd91493a_101 +- numpy==1.25.2=py310ha4c1d20_0 +- openjpeg==2.5.0=hfec8fc6_2 +- openssl==3.1.2=hd590300_0 +- overrides==7.4.0=pyhd8ed1ab_0 +- packaging==23.1=pyhd8ed1ab_0 +- pandas==2.0.3=py310h7cbd5c2_1 +- pandocfilters==1.5.0=pyhd8ed1ab_0 +- parso==0.8.3=pyhd8ed1ab_0 +- patsy==0.5.3=pyhd8ed1ab_0 +- pexpect==4.8.0=pyh1a96a4e_2 +- pickleshare==0.7.5=py_1003 +- pillow==10.0.0=py310h582fbeb_0 +- pip +- pkgutil-resolve-name==1.3.10=pyhd8ed1ab_0 +- platformdirs==3.10.0=pyhd8ed1ab_0 +- pooch==1.7.0=pyha770c72_3 +- prometheus_client==0.17.1=pyhd8ed1ab_0 +- prompt-toolkit==3.0.39=pyha770c72_0 +- prompt_toolkit==3.0.39=hd8ed1ab_0 +- psutil==5.9.5=py310h1fa729e_0 +- pthread-stubs==0.4=h36c2ea0_1001 +- ptyprocess==0.7.0=pyhd3deb0d_0 +- pure_eval==0.2.2=pyhd8ed1ab_0 +- py-cpuinfo==9.0.0=pyhd8ed1ab_0 +- pycparser==2.21=pyhd8ed1ab_0 +- pygments==2.16.1=pyhd8ed1ab_0 +- pyparsing==3.0.9=pyhd8ed1ab_0 +- pysocks==1.7.1=pyha2e5f31_6 +- pytables==3.8.0=py310ha028ce3_2 +- python==3.10.12=hd12c33a_0_cpython +- python-dateutil==2.8.2=pyhd8ed1ab_0 +- python-fastjsonschema==2.18.0=pyhd8ed1ab_0 +- python-json-logger==2.0.7=pyhd8ed1ab_0 +- python-tzdata==2023.3=pyhd8ed1ab_0 +- python_abi==3.10=3_cp310 +- pytorch-cuda==11.6=h867d48c_0 +- pytz==2023.3=pyhd8ed1ab_0 +- pyyaml==6.0=py310h5764c6d_5 +- pyzmq==25.1.1=py310h5bbb5d0_0 +- readline==8.2=h8228510_1 +- referencing==0.30.2=pyhd8ed1ab_0 +- requests==2.31.0=pyhd8ed1ab_0 +- rfc3339-validator==0.1.4=pyhd8ed1ab_0 +- rfc3986-validator==0.1.1=pyh9f0ad1d_0 +- rpds-py==0.9.2=py310hcb5633a_0 +- scipy==1.11.1=py310ha4c1d20_0 +- seaborn==0.12.2=hd8ed1ab_0 +- seaborn-base==0.12.2=pyhd8ed1ab_0 +- send2trash==1.8.2=pyh41d4057_0 +- setuptools==68.1.2=pyhd8ed1ab_0 +- six==1.16.0=pyh6c4a22f_0 +- snappy==1.1.10=h9fff704_0 +- sniffio==1.3.0=pyhd8ed1ab_0 +- soupsieve==2.3.2.post1=pyhd8ed1ab_0 +- stack_data==0.6.2=pyhd8ed1ab_0 +- statsmodels==0.14.0=py310h278f3c1_1 +- terminado==0.17.1=pyh41d4057_0 +- tinycss2==1.2.1=pyhd8ed1ab_0 +- tk==8.6.12=h27826a3_0 +- tomli==2.0.1=pyhd8ed1ab_0 +- tornado==6.3.3=py310h2372a71_0 +- traitlets==5.9.0=pyhd8ed1ab_0 +- typing-extensions==4.7.1=hd8ed1ab_0 +- typing_extensions==4.7.1=pyha770c72_0 +- typing_utils==0.1.0=pyhd8ed1ab_0 +- tzdata==2023c=h71feb2d_0 +- unicodedata2==15.0.0=py310h5764c6d_0 +- uri-template==1.3.0=pyhd8ed1ab_0 +- urllib3==2.0.4=pyhd8ed1ab_0 +- wcwidth==0.2.6=pyhd8ed1ab_0 +- webcolors==1.13=pyhd8ed1ab_0 +- webencodings==0.5.1=py_1 +- websocket-client==1.6.1=pyhd8ed1ab_0 +- wheel==0.41.1=pyhd8ed1ab_0 +- xorg-libxau==1.0.11=hd590300_0 +- xorg-libxdmcp==1.1.3=h7f98852_0 +- xz==5.2.6=h166bdaf_0 +- yaml==0.2.5=h7f98852_2 +- zeromq==4.3.4=h9c3ff4c_1 +- zipp==3.16.2=pyhd8ed1ab_0 +- zlib==1.2.13=hd590300_5 +- zlib-ng==2.0.7=h0b41bf4_0 +- zstd==1.5.2=hfc55251_7 + diff --git a/eval/align.py b/eval/align.py new file mode 100644 index 0000000000000000000000000000000000000000..9337214774843fed0109c2b34361db27f9b95adc --- /dev/null +++ b/eval/align.py @@ -0,0 +1,17 @@ +import subprocess +import re +from tqdm import tqdm +import os + + +RUNNER = '/datapool/data2/home/jiahan/Tool/TMalign-20180426/MMalign' + +def align_pdb(pdb1,pdb2,pdb1_out): + subprocess.run([RUNNER,pdb1,pdb2,'-o',pdb1_out],stdout=subprocess.PIPE) + +def get_tm_score(pdb1,pdb2): + cmd = subprocess.run(['TMscore',pdb1,pdb2],stdout=subprocess.PIPE) + out = cmd.stdout.decode() + tm_score = re.search(r"TM-score\s+=\s+(\d+\.\d+)", out) + rmsd = re.search(r"RMSD of the common residues=\s+(\d+\.\d+)", out) + return float(rmsd.group(1)),float(tm_score.group(1)) \ No newline at end of file diff --git a/eval/energy.py b/eval/energy.py new file mode 100644 index 0000000000000000000000000000000000000000..7cb307e831a13d893d54fc905ecaafee5fbe757d --- /dev/null +++ b/eval/energy.py @@ -0,0 +1,94 @@ +import pyrosetta +from pyrosetta import init, pose_from_pdb, get_fa_scorefxn +from pyrosetta.rosetta.protocols.relax import FastRelax +from pyrosetta.rosetta.protocols.analysis import InterfaceAnalyzerMover +from pyrosetta.rosetta.core.pack.task import TaskFactory +from pyrosetta.rosetta.core.pack.task.operation import RestrictToRepacking +from pyrosetta.rosetta.protocols.minimization_packing import PackRotamersMover + +import os +import pandas as pd +import subprocess +import numpy as np +import shutil +from tqdm import tqdm +import pickle + +from joblib import delayed, Parallel +from utils import * + +input_dir=".Tests" +output_dir="./Pack" + +def get_chain_dic(input_pdb): + parser = PDBParser() + structure = parser.get_structure("protein", input_pdb) + chain_dic = {} + for model in structure: + for chain in model: + chain_dic[chain.id] = len([res for res in chain if is_aa(res) and res.has_id('CA')]) + + return chain_dic + +def get_rosetta_score_base(pdb_path,chain_id='A'): + try: + init() + pose = pyrosetta.pose_from_pdb(pdb_path) + chains = list(get_chain_dic(pdb_path).keys()) + chains.remove(chain_id) + interface = f'{chain_id}_{"".join(chains)}' + fast_relax = FastRelax() # cant be pickled + scorefxn = get_fa_scorefxn() + fast_relax.set_scorefxn(scorefxn) + mover = InterfaceAnalyzerMover(interface) + mover.set_pack_separated(True) + stabs,binds = [],[] + for i in range(5): + fast_relax.apply(pose) + stab = scorefxn(pose) + mover.apply(pose) + bind = pose.scores['dG_separated'] + stabs.append(stab) + binds.append(bind) + return {'name':pdb_path,'stab':np.array(stabs).mean(),'bind':np.array(binds).mean()} + except: + return {'name':pdb_path,'stab':999.0,'bind':999.0} + + +def get_rosetta_score(pdb_path,chain='A'): + try: + init() + pose = pyrosetta.pose_from_pdb(pdb_path) + # chains = list(get_chain_dic(os.path.join(input_dir,name,'pocket_merge_renum.pdb')).keys()) + # chains.remove(chain) + # interface = f'{chain}_{"".join(chains)}' + interface='A_B' + fast_relax = FastRelax() # cant be pickled + scorefxn = get_fa_scorefxn() + fast_relax.set_scorefxn(scorefxn) + mover = InterfaceAnalyzerMover(interface) + mover.set_pack_separated(True) + fast_relax.apply(pose) + energy = scorefxn(pose) + mover.apply(pose) + dg = pose.scores['dG_separated'] + return [pdb_path,energy,dg] + except: + return [pdb_path,999.0,999.0] + +def pack_sc(name='1a1m_C',num_samples=10): + try: + if os.path.exists(os.path.join(output_dir,name,'rosetta')): + shutil.rmtree(os.path.join(output_dir,name,'rosetta')) + os.makedirs(os.path.join(output_dir,name,'rosetta'),exist_ok=True) + init() + tf = TaskFactory() + tf.push_back(RestrictToRepacking()) # Only repack, don't change amino acid types + packer = PackRotamersMover() + packer.task_factory(tf) + for i in range(num_samples): + pose = pose_from_pdb(os.path.join(input_dir,name,f'pocket_merge_renum_bb.pdb')) + packer.apply(pose) + pose.dump_pdb(os.path.join(output_dir,name,'rosetta',f'packed_{i}.pdb')) + except: + return None \ No newline at end of file diff --git a/eval/foldx.py b/eval/foldx.py new file mode 100644 index 0000000000000000000000000000000000000000..f3f44f4e0dcfe17955dab180305cc69188e74a4b --- /dev/null +++ b/eval/foldx.py @@ -0,0 +1,77 @@ +import pandas as pd +import numpy as np +import pandas as pd +import torch +from joblib import Parallel, delayed +from tqdm import tqdm +import tempfile +import os +import shutil +import subprocess + +from Bio.PDB import PDBParser + +def fetch_stability_score(path): + u = pd.read_csv(path, sep='\t', header=None) + return u.values[0][1] + +def fetch_binding_affinity(path): + with open(path, 'r') as f: + u = f.readlines() + return float(u[-1].split("\t")[-3]) + +class FoldXSession(object): + def __init__(self): + super().__init__() + self.tmpdir = tempfile.TemporaryDirectory() + self.pdb_names = [] + + def cleanup(self): + self.tmpdir.cleanup() + self.tmpdir = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.cleanup() + + @property + def workdir(self): + return self.tmpdir.name + + def path(self, filename): + return os.path.join(self.workdir, filename) + + def preprocess_data(self, pdb_dir, pdb_name): + shutil.copy(os.path.join(pdb_dir, pdb_name), self.path(pdb_name)) + return self.path(pdb_name) + +def get_chain_names(pdb_dir,pdb_name): + pep_chain = pdb_name.split("_")[-1][0] + parser = PDBParser() + structure = parser.get_structure("name", os.path.join(pdb_dir,pdb_name)) + chain_names = [chain.get_id() for model in structure for chain in model] + chains = f"{pep_chain}," + for chain in chain_names: + if chain != pep_chain: + chains += f"{chain}" + return chains + +def process_one_file(pdb_dir,pdb_name): + chains = get_chain_names(pdb_dir,pdb_name) + with FoldXSession() as session: + try: + # print(session.workdir) + session.preprocess_data(pdb_dir, pdb_name) + assert(os.path.exists(session.path(pdb_name))) + # print(os.listdir(session.workdir)) + ret = subprocess.run(['/datapool/data2/home/ruihan/bin/foldx', '--command='+'AnalyseComplex', '--pdb='+pdb_name, f'--analyseComplexChains={chains}'], cwd=session.workdir, stdout=None) + fxout_path = session.path(f'Summary_{pdb_name.split(".")[0]}_AC.fxout') + assert(os.path.exists(fxout_path)) + return (pdb_name.split('.')[0],fetch_binding_affinity(fxout_path)) + except: + print(f"Error in {pdb_name}") + print(os.path.exists(fxout_path)) + return (pdb_name.split('.')[0],None) + \ No newline at end of file diff --git a/eval/geometry.py b/eval/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..f0df563260a1f0ce98f667eb9d932cbc30ed9c5d --- /dev/null +++ b/eval/geometry.py @@ -0,0 +1,127 @@ +from Bio.PDB import PDBParser, Superimposer, is_aa, Select, NeighborSearch +import tmtools +import os +import numpy as np +import mdtraj as md +from Bio.SeqUtils import seq1 + +import warnings +from Bio import BiopythonWarning, SeqIO + +import difflib +import torch + +# 忽略PDBConstructionWarning +warnings.filterwarnings('ignore', category=BiopythonWarning) + +def get_chain_from_pdb(pdb_path, chain_id='A'): + parser = PDBParser() + structure = parser.get_structure('X', pdb_path)[0] + for chain in structure: + if chain.id == chain_id: + # print(len(chain)) + return chain + return None + +def diff_ratio(str1, str2): + # Create a SequenceMatcher object + seq_matcher = difflib.SequenceMatcher(None, str1, str2) + + # Calculate the difference ratio + return seq_matcher.ratio() + +####################################### + +#RMSD and Tm + +####################################### +def align_chains(chain1, chain2): + reslist1 = [] + reslist2 = [] + for residue1,residue2 in zip(chain1.get_residues(),chain2.get_residues()): + if is_aa(residue1) and residue1.has_id('CA'): # at least have CA + reslist1.append(residue1) + reslist2.append(residue2) + return reslist1,reslist2 + +def get_rmsd(chain1, chain2): + # chain1 = get_chain_from_pdb(pdb1, chain_id1) + # chain2 = get_chain_from_pdb(pdb2, chain_id2) + if chain1 is None or chain2 is None: + return None + super_imposer = Superimposer() + pos1 = np.array([atom.get_coord() for atom in chain1.get_atoms() if atom.name == 'CA']) + pos2 = np.array([atom.get_coord() for atom in chain2.get_atoms() if atom.name == 'CA']) + rmsd1 = np.sqrt(np.sum((pos1 - pos2)**2) / len(pos1)) + super_imposer.set_atoms([atom for atom in chain1.get_atoms() if atom.name == 'CA'], + [atom for atom in chain2.get_atoms() if atom.name == 'CA']) + rmsd2 = super_imposer.rms + return rmsd1,rmsd2 + +def get_tm(chain1,chain2): + # chain1 = get_chain_from_pdb(pdb1, chain_id1) + # chain2 = get_chain_from_pdb(pdb2, chain_id2) + pos1 = np.array([atom.get_coord() for atom in chain1.get_atoms() if atom.name == 'CA']) + pos2 = np.array([atom.get_coord() for atom in chain2.get_atoms() if atom.name == 'CA']) + tm_results = tmtools.tm_align(pos1, pos2, 'A'*len(pos1), 'A'*len(pos2)) + # print(dir(tm_results)) + return tm_results.tm_norm_chain2 + +def get_traj_chain(pdb, chain): + parser = PDBParser() + structure = parser.get_structure('X', pdb)[0] + chain2id = {chain.id:i for i,chain in enumerate(structure)} + traj = md.load(pdb) + chain_indices = traj.topology.select(f"chainid {chain2id[chain]}") + traj = traj.atom_slice(chain_indices) + return traj + +def get_second_stru(pdb,chain): + parser = PDBParser() + structure = parser.get_structure('X', pdb)[0] + chain2id = {chain.id:i for i,chain in enumerate(structure)} + traj = md.load(pdb) + chain_indices = traj.topology.select(f"chainid {chain2id[chain]}") + traj = traj.atom_slice(chain_indices) + return md.compute_dssp(traj,simplified=True) + +def get_ss(traj1,traj2): + # traj1,traj2 = get_traj_chain(pdb1,chain_id1),get_traj_chain(pdb2,chain_id2) + ss1,ss2 = md.compute_dssp(traj1,simplified=True),md.compute_dssp(traj2,simplified=True) + return (ss1==ss2).mean() + +def get_bind_site(pdb,chain_id): + parser = PDBParser() + structure = parser.get_structure('X', pdb)[0] + peps = [atom for res in structure[chain_id] for atom in res if atom.get_name() == 'CA'] + recs = [atom for chain in structure if chain.get_id()!=chain_id for res in chain for atom in res if atom.get_name() == 'CA'] + # print(recs) + search = NeighborSearch(recs) + near_res = [] + for atom in peps: + near_res += search.search(atom.get_coord(), 10.0, level='R') + near_res = set([res.get_id()[1] for res in near_res]) + return near_res + +def get_bind_ratio(pdb1, pdb2, chain_id1, chain_id2): + near_res1,near_res2 = get_bind_site(pdb1,chain_id1),get_bind_site(pdb2,chain_id2) + # print(near_res1) + # print(near_res2) + return len(near_res1.intersection(near_res2))/(len(near_res2)+1e-10) # last one is gt + +def get_dihedral(pdb,chain): + traj = get_traj_chain(pdb,chain) + #TODO: dihedral + +def get_seq(pdb,chain_id): + parser = PDBParser() + chain = parser.get_structure('X', pdb)[0][chain_id] + return seq1("".join([residue.get_resname() for residue in chain])) # ignore is_aa,used for extract seq from genrated pdb + +def get_mpnn_seqs(path): + fastas = [] + for record in SeqIO.parse(path, "fasta"): + tmp = [c for c in str(record.seq)] + fastas.append(tmp) + return fastas + diff --git a/eval/run_esmfold.py b/eval/run_esmfold.py new file mode 100644 index 0000000000000000000000000000000000000000..fae39c2a90154a3a4978f2325ed5e17a6f590ab1 --- /dev/null +++ b/eval/run_esmfold.py @@ -0,0 +1,73 @@ +import os +import pandas as pd +import subprocess +import torch +import esm +import numpy as np +import shutil +from tqdm import tqdm + +from joblib import delayed, Parallel + +import warnings +from Bio import BiopythonWarning, SeqIO + +from geometry import * + +# 忽略PDBConstructionWarning +warnings.filterwarnings('ignore', category=BiopythonWarning) + +input_dir="./Data/Baselines_new/Tests" +output_dir="/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Baselines_new/Codesign" + +model = esm.pretrained.esmfold_v1() +model = model.eval().to('cuda:2') + +def process_rf(name='1aze_B'): + input_dir=".Data/Baselines_new/Tests" + output_dir=".Data/Baselines_new/Codesign" + struct_dir = os.path.join(output_dir,name,'rfs_refold') + seq_dir = os.path.join(output_dir,name,'mpnns','seqs') + os.makedirs(struct_dir,exist_ok=True) + seqs = {} + for seq_path in os.listdir(seq_dir): + tmp_seqs = [] + if seq_path.endswith('.fasta'): + for record in SeqIO.parse(os.path.join(seq_dir,seq_path), "fasta"): + tmp_seqs.append(str(record.seq)) + seqs[seq_path.split('.')[0]] = tmp_seqs[-1] + for seq_name,seq in seqs.items(): + with torch.no_grad(): + output = model.infer_pdb(seq) + with open(os.path.join(struct_dir,seq_name+'.pdb'),'w') as f: + f.write(output) + +def process_pg(name='1aze_B',chain_id='A'): + input_dir=".Data/Baselines_new/Tests" + output_dir=".Data/Baselines_new/Codesign" + struct_dir = os.path.join(output_dir,name,'pgs_refold') + seq_dir = os.path.join(output_dir,name,'pgs') + os.makedirs(struct_dir,exist_ok=True) + seqs = {} + for seq_path in os.listdir(seq_dir): + if seq_path.endswith('.pdb'): + seqs[seq_path.split('.')[0]] = get_seq(os.path.join(seq_dir,seq_path),chain_id) + for seq_name,seq in seqs.items(): + with torch.no_grad(): + output = model.infer_pdb(seq) + with open(os.path.join(struct_dir,seq_name+'.pdb'),'w') as f: + f.write(output) + +def refold(name,chain_id,sub_dir): + raw_dir = os.path.join('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Codesign',sub_dir,'pdbs') + refold_dir = os.path.join('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Codesign',sub_dir,'pdbs_refold') + os.makedirs(os.path.join(refold_dir,name),exist_ok=True) + seqs = {} + for seq_path in os.listdir(os.path.join(raw_dir,name)): + if seq_path.endswith('.pdb'): + seqs[seq_path.split('.')[0]] = get_seq(os.path.join(raw_dir,name,seq_path),chain_id) + for seq_name,seq in seqs.items(): + with torch.no_grad(): + output = model.infer_pdb(seq) + with open(os.path.join(refold_dir,name,seq_name+'.pdb'),'w') as f: + f.write(output) \ No newline at end of file diff --git a/eval/run_esmif.py b/eval/run_esmif.py new file mode 100644 index 0000000000000000000000000000000000000000..9c6f8fc15f7d4845fc904b06b845462d8a9a51ea --- /dev/null +++ b/eval/run_esmif.py @@ -0,0 +1,33 @@ +from utils import * + +import os +import pandas as pd +import subprocess +import torch +import numpy as np +import shutil +from tqdm import tqdm + +from joblib import delayed, Parallel + +input_dir="./Baselines_new/Tests" +# output_dir="/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/Data/RF_samples" +output_dir="./Data/Baselines_new/Fixbb" + +RUNNER = "/datapool/data2/home/jiahan/Tool/esm/examples/inverse_folding/sample_sequences.py" + + +def process_one_item_esmif(name='1a1m_C',chains_to_design="A",num_samples=10,temperature=0.1): + if not os.path.exists(os.path.join(output_dir,name,'esms')): + os.makedirs(os.path.join(output_dir,name,'esms')) + assert os.path.exists(os.path.join(output_dir,name,'esms')) + # if not os.path.exists(os.path.join(output_dir,name,'pocket_merge_renum.pdb')): + # chain_dic = renumber_pdb(os.path.join(input_dir,name,'pocket_merge.pdb'),os.path.join(output_dir,name,'pocket_merge_renum.pdb')) + dirname = os.path.join(output_dir,name,'esms') + cmd = [ + "python", RUNNER, os.path.join(input_dir,name,'pocket_merge_renum.pdb'), + "--chain", chains_to_design, "--temperature", f"{temperature}", "--num-samples", f"{num_samples}", + "--outpath", os.path.join(dirname,'pocket_merge_renum.fasta'), + "--multichain-backbone", "--nogpu" +] + subprocess.run(cmd) diff --git a/eval/run_mpnn.py b/eval/run_mpnn.py new file mode 100644 index 0000000000000000000000000000000000000000..61000f16f05306b048a3e498a0e32a4beb1208bb --- /dev/null +++ b/eval/run_mpnn.py @@ -0,0 +1,146 @@ +from utils import * +from geometry import * + +import os +import pandas as pd +import subprocess +import torch +import numpy as np +import shutil +from tqdm import tqdm + +from joblib import delayed, Parallel + +from Bio.PDB import PDBParser, PDBIO, Select + + +HELPERS = "/datapool/data2/home/jiahan/Tool/ProteinMPNN/helper_scripts" +RUNNER = "/datapool/data2/home/jiahan/Tool/ProteinMPNN/protein_mpnn_run.py" + +def get_chain_nums(pdb_path,chain_id): + parser = PDBParser() + chain = parser.get_structure('X',pdb_path)[0][chain_id] + residue_nums = [residue.get_id()[1] for residue in chain] + return residue_nums + +def process_mpnn_bb(name='1aze_B',chains_to_design="A",num_samples=1): + input_dir = './Data/Models_new/Codesign/bb/pdbs' + output_dir = './Data/Models_new/Codesign/bb/seqs' + if not os.path.exists(os.path.join(output_dir,name)): + os.makedirs(os.path.join(output_dir,name)) + dirname = os.path.join(output_dir,name) + # defined dirs + path_for_parsed_chains=os.path.join(dirname,'parsed_pdbs.jsonl') + path_for_assigned_chains=os.path.join(dirname,'assigned_pdbs.jsonl') + path_for_fixed_positions=os.path.join(dirname,'fixed_pdbs.jsonl') + residue_nums = get_chain_nums(os.path.join(input_dir,name,'gt.pdb'),chains_to_design) + design_only_positions = " ".join(map(str,residue_nums)) #design only these residues; use flag --specify_non_fixed + # print(path_for_assigned_chains) + # print(design_only_positions) + subprocess.run([ + "python", os.path.join(HELPERS,"parse_multiple_chains.py"), + "--input_path", os.path.join(input_dir,name), + "--output_path", path_for_parsed_chains, + ]) + subprocess.run([ + "python", os.path.join(HELPERS,"assign_fixed_chains.py"), + "--input_path", path_for_parsed_chains, + "--output_path", path_for_assigned_chains, + '--chain_list', chains_to_design, + ]) + subprocess.run([ + "python", os.path.join(HELPERS,"make_fixed_positions_dict.py"), + "--input_path", path_for_parsed_chains, + "--output_path", path_for_fixed_positions, + '--chain_list', chains_to_design, + '--position_list', design_only_positions, + '--specify_non_fixed' + ]) + # run mpnn + # print('run mpnns') + subprocess.run([ + "python", RUNNER, + "--jsonl_path", path_for_parsed_chains, + "--chain_id_jsonl", path_for_assigned_chains, + "--fixed_positions_jsonl", path_for_fixed_positions, + "--out_folder", dirname, + "--num_seq_per_target", f"{num_samples}", + "--sampling_temp", "0.1", + "--seed", "37", + "--batch_size","1", + '--device','cuda:1' + ]) + +def process_one_item_mpnn(name='1a1m_C',chains_to_design="A",num_samples=1): + input_dir="./Data/Baselines_new/Tests" + output_dir="./Data/Baselines_new/Codesign" + if not os.path.exists(os.path.join(output_dir,name,'mpnns')): + os.makedirs(os.path.join(output_dir,name,'mpnns')) + # if not os.path.exists(os.path.join(output_dir,name,'pocket_merge_renum.pdb')): + # chain_dic = renumber_pdb(os.path.join(input_dir,name,'pocket_merge.pdb'),os.path.join(output_dir,name,'pocket_merge_renum.pdb')) + dirname = os.path.join(output_dir,name,'mpnns') + # defined dirs + path_for_parsed_chains=os.path.join(dirname,'parsed_pdbs.jsonl') + path_for_assigned_chains=os.path.join(dirname,'assigned_pdbs.jsonl') + path_for_fixed_positions=os.path.join(dirname,'fixed_pdbs.jsonl') + with open(os.path.join(input_dir,name,'seq.fasta'),'r') as f: + pep_len = len(f.readlines()[1].strip()) + design_only_positions=" ".join(map(str,list(range(1,pep_len+1)))) #design only these residues; use flag --specify_non_fixed + # print(design_only_positions) + # parsed chains + # print("parsing chains") + subprocess.run([ + "python", os.path.join(HELPERS,"parse_multiple_chains.py"), + "--input_path", os.path.join('./Data/Baselines_new/Codesign',name,'rfs'),#os.path.join('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Baselines/Fixbb/',name), + "--output_path", path_for_parsed_chains, + ]) + subprocess.run([ + "python", os.path.join(HELPERS,"assign_fixed_chains.py"), + "--input_path", path_for_parsed_chains, + "--output_path", path_for_assigned_chains, + '--chain_list', chains_to_design, + ]) + subprocess.run([ + "python", os.path.join(HELPERS,"make_fixed_positions_dict.py"), + "--input_path", path_for_parsed_chains, + "--output_path", path_for_fixed_positions, + '--chain_list', chains_to_design, + '--position_list', design_only_positions, + '--specify_non_fixed' + ]) + # run mpnn + # print('run mpnns') + subprocess.run([ + "python", RUNNER, + "--jsonl_path", path_for_parsed_chains, + "--chain_id_jsonl", path_for_assigned_chains, + "--fixed_positions_jsonl", path_for_fixed_positions, + "--out_folder", dirname, + "--num_seq_per_target", f"{num_samples}", + "--sampling_temp", "0.1", + "--seed", "37", + "--batch_size","1", + '--device','cuda:1' + ]) + + +def write_seq_to_pdb(seq_path,pdb_path,out_path,chain_id): + # first we should fix GGGGG in rfs with mpnn generated seq + aa_mapping = {"A": "ALA","C": "CYS","D": "ASP","E": "GLU","F": "PHE","G": "GLY","H": "HIS","I": "ILE","K": "LYS","L": "LEU","M": "MET","N": "ASN","P": "PRO","Q": "GLN","R": "ARG","S": "SER","T": "THR","V": "VAL","W": "TRP","Y": "TYR", + 'X':'UNK'} + tmps = [] + for record in SeqIO.parse(seq_path, "fasta"): + tmps.append(str(record.seq)) + seq = tmps[-1] + + parser = PDBParser() + structure = parser.get_structure("X", pdb_path) + model = structure[0] + for chain in model: + if chain.id == chain_id: # 假设你要更改的是链A + for i,res in enumerate(chain): + if i(B,3)/(B,1)->(B,3) + center = center.unsqueeze(1) # (B,1,3) + # center = 0. it seems not center didnt influence the result, but its good for training stabilty + pos = pos - center + pos = pos * res_mask[...,None] + return pos,center + + def seq_to_simplex(self,seqs): + return clampped_one_hot(seqs, self.K).float() * self.k * 2 - self.k # (B,L,K) + + def forward(self, batch): + + num_batch, num_res = batch['aa'].shape + gen_mask,res_mask,angle_mask = batch['generate_mask'].long(),batch['res_mask'].long(),batch['torsion_angle_mask'].long() + + #encode + rotmats_1, trans_1, angles_1, seqs_1, node_embed, edge_embed = self.encode(batch) # no generate mask + + # prepare for denoise + trans_1_c,_ = self.zero_center_part(trans_1,gen_mask,res_mask) + trans_1_c = trans_1 # already centered when constructing dataset + seqs_1_simplex = self.seq_to_simplex(seqs_1) + seqs_1_prob = F.softmax(seqs_1_simplex,dim=-1) + + with torch.no_grad(): + t = torch.rand((num_batch,1), device=batch['aa'].device) + t = t*(1-2 * self._interpolant_cfg.t_normalization_clip) + self._interpolant_cfg.t_normalization_clip # avoid 0 + if self.sample_structure: + # corrupt trans + trans_0 = torch.randn((num_batch,num_res,3), device=batch['aa'].device) * self._interpolant_cfg.trans.sigma # scale with sigma? + trans_0_c,_ = self.zero_center_part(trans_0,gen_mask,res_mask) + trans_t = (1-t[...,None])*trans_0_c + t[...,None]*trans_1_c + trans_t_c = torch.where(batch['generate_mask'][...,None],trans_t,trans_1_c) + # corrupt rotmats + rotmats_0 = uniform_so3(num_batch,num_res,device=batch['aa'].device) + rotmats_t = so3_utils.geodesic_t(t[..., None], rotmats_1, rotmats_0) + rotmats_t = torch.where(batch['generate_mask'][...,None,None],rotmats_t,rotmats_1) + # corrup angles + angles_0 = torus.tor_random_uniform(angles_1.shape, device=batch['aa'].device, dtype=angles_1.dtype) # (B,L,5) + angles_t = torus.tor_geodesic_t(t[..., None], angles_1, angles_0) + angles_t = torch.where(batch['generate_mask'][...,None],angles_t,angles_1) + else: + trans_t_c = trans_1_c.detach().clone() + rotmats_t = rotmats_1.detach().clone() + angles_t = angles_1.detach().clone() + if self.sample_sequence: + # corrupt seqs + seqs_0_simplex = self.k * torch.randn_like(seqs_1_simplex) # (B,L,K) + seqs_0_prob = F.softmax(seqs_0_simplex,dim=-1) # (B,L,K) + seqs_t_simplex = ((1 - t[..., None]) * seqs_0_simplex) + (t[..., None] * seqs_1_simplex) # (B,L,K) + seqs_t_simplex = torch.where(batch['generate_mask'][...,None],seqs_t_simplex,seqs_1_simplex) + seqs_t_prob = F.softmax(seqs_t_simplex,dim=-1) # (B,L,K) + seqs_t = sample_from(seqs_t_prob) # (B,L) + seqs_t = torch.where(batch['generate_mask'],seqs_t,seqs_1) + else: + seqs_t = seqs_1.detach().clone() + seqs_t_simplex = seqs_1_simplex.detach().clone() + seqs_t_prob = seqs_1_prob.detach().clone() + + # denoise + pred_rotmats_1, pred_trans_1, pred_angles_1, pred_seqs_1_prob = self.ga_encoder(t, rotmats_t, trans_t_c, angles_t, seqs_t, node_embed, edge_embed, gen_mask, res_mask) + pred_seqs_1 = sample_from(F.softmax(pred_seqs_1_prob,dim=-1)) + pred_seqs_1 = torch.where(batch['generate_mask'],pred_seqs_1,torch.clamp(seqs_1,0,19)) + pred_trans_1_c,_ = self.zero_center_part(pred_trans_1,gen_mask,res_mask) + pred_trans_1_c = pred_trans_1 # implicitly enforce zero center in gen_mask, in this way, we dont need to move receptor when sampling + + norm_scale = 1 / (1 - torch.min(t[...,None], torch.tensor(self._interpolant_cfg.t_normalization_clip))) # yim etal.trick, 1/1-t + + # trans vf loss + trans_loss = torch.sum((pred_trans_1_c - trans_1_c)**2*gen_mask[...,None],dim=(-1,-2)) / (torch.sum(gen_mask,dim=-1) + 1e-8) # (B,) + trans_loss = torch.mean(trans_loss) + + # rots vf loss + gt_rot_vf = so3_utils.calc_rot_vf(rotmats_t, rotmats_1) + pred_rot_vf = so3_utils.calc_rot_vf(rotmats_t, pred_rotmats_1) + rot_loss = torch.sum(((gt_rot_vf - pred_rot_vf) * norm_scale)**2*gen_mask[...,None],dim=(-1,-2)) / (torch.sum(gen_mask,dim=-1) + 1e-8) # (B,) + rot_loss = torch.mean(rot_loss) + + # bb aux loss + gt_bb_atoms = all_atom.to_atom37(trans_1_c, rotmats_1)[:, :, :3] + pred_bb_atoms = all_atom.to_atom37(pred_trans_1_c, pred_rotmats_1)[:, :, :3] + # gt_bb_atoms = all_atom.to_bb_atoms(trans_1_c, rotmats_1, angles_1[:,:,0]) # N,CA,C,O,CB + # pred_bb_atoms = all_atom.to_bb_atoms(pred_trans_1_c, pred_rotmats_1, pred_angles_1[:,:,0]) + # print(gt_bb_atoms.shape) + bb_atom_loss = torch.sum( + (gt_bb_atoms - pred_bb_atoms) ** 2 * gen_mask[..., None, None], + dim=(-1, -2, -3) + ) / (torch.sum(gen_mask,dim=-1) + 1e-8) # (B,) + bb_atom_loss = torch.mean(bb_atom_loss) + # bb_atom_loss = torch.mean(torch.where(t[:,0]>=0.75,bb_atom_loss,torch.zeros_like(bb_atom_loss))) # penalty for near gt point + + # seqs vf loss + seqs_loss = F.cross_entropy(pred_seqs_1_prob.view(-1,pred_seqs_1_prob.shape[-1]),torch.clamp(seqs_1,0,19).view(-1), reduction='none').view(pred_seqs_1_prob.shape[:-1]) # (N,L), not softmax + seqs_loss = torch.sum(seqs_loss * gen_mask, dim=-1) / (torch.sum(gen_mask,dim=-1) + 1e-8) + seqs_loss = torch.mean(seqs_loss) + + # we should not use angle mask, as you dont know aa type when generating + # angle_mask_loss = torch.cat([angle_mask,angle_mask],dim=-1) # (B,L,10) + # angle vf loss + angle_mask_loss = torsions_mask.to(batch['aa'].device) + angle_mask_loss = angle_mask_loss[pred_seqs_1.reshape(-1)].reshape(num_batch,num_res,-1) # (B,L,5) + angle_mask_loss = torch.cat([angle_mask_loss,angle_mask_loss],dim=-1) # (B,L,10) + angle_mask_loss = torch.logical_and(batch['generate_mask'][...,None].bool(),angle_mask_loss) + gt_angle_vf = torus.tor_logmap(angles_t, angles_1) + gt_angle_vf_vec = torch.cat([torch.sin(gt_angle_vf),torch.cos(gt_angle_vf)],dim=-1) + pred_angle_vf = torus.tor_logmap(angles_t, pred_angles_1) + pred_angle_vf_vec = torch.cat([torch.sin(pred_angle_vf),torch.cos(pred_angle_vf)],dim=-1) + # angle_loss = torch.sum(((gt_angle_vf_vec - pred_angle_vf_vec) * norm_scale)**2*gen_mask[...,None],dim=(-1,-2)) / ((torch.sum(gen_mask,dim=-1)) + 1e-8) # (B,) + angle_loss = torch.sum(((gt_angle_vf_vec - pred_angle_vf_vec) * norm_scale)**2*angle_mask_loss,dim=(-1,-2)) / (torch.sum(angle_mask_loss,dim=(-1,-2)) + 1e-8) # (B,) + angle_loss = torch.mean(angle_loss) + + + # angle aux loss + angles_1_vec = torch.cat([torch.sin(angles_1),torch.cos(angles_1)],dim=-1) + pred_angles_1_vec = torch.cat([torch.sin(pred_angles_1),torch.cos(pred_angles_1)],dim=-1) + # torsion_loss = torch.sum((pred_angles_1_vec - angles_1_vec)**2*gen_mask[...,None],dim=(-1,-2)) / (torch.sum(gen_mask,dim=-1) + 1e-8) # (B,) + torsion_loss = torch.sum((pred_angles_1_vec - angles_1_vec)**2*angle_mask_loss,dim=(-1,-2)) / (torch.sum(angle_mask_loss,dim=(-1,-2)) + 1e-8) # (B,) + torsion_loss = torch.mean(torsion_loss) + + return { + "trans_loss": trans_loss, + 'rot_loss': rot_loss, + 'bb_atom_loss': bb_atom_loss, + 'seqs_loss': seqs_loss, + 'angle_loss': angle_loss, + 'torsion_loss': torsion_loss, + } + + @torch.no_grad() + def sample(self, batch, num_steps = 100, sample_bb=True, sample_ang=True, sample_seq=True): + + num_batch, num_res = batch['aa'].shape + gen_mask,res_mask = batch['generate_mask'],batch['res_mask'] + K = self._interpolant_cfg.seqs.num_classes + k = self._interpolant_cfg.seqs.simplex_value + angle_mask_loss = torsions_mask.to(batch['aa'].device) + + #encode + rotmats_1, trans_1, angles_1, seqs_1, node_embed, edge_embed = self.encode(batch) + # trans_1_c,center = self.zero_center_part(trans_1,gen_mask,res_mask) + trans_1_c = trans_1 + seqs_1_simplex = self.seq_to_simplex(seqs_1) + seqs_1_prob = F.softmax(seqs_1_simplex,dim=-1) + + # # # only sample bb, angle and seq with noise + # angles_1 = torch.where(batch['generate_mask'][...,None],angles_1,torus.tor_random_uniform(angles_1.shape, device=batch['aa'].device, dtype=angles_1.dtype)) + # seqs_1 = torch.where(batch['generate_mask'],seqs_1,torch.randint_like(seqs_1,0,20)) + # seqs_1_simplex = self.seq_to_simplex(seqs_1) + # seqs_1_prob = F.softmax(seqs_1_simplex,dim=-1) + + #initial noise + if sample_bb: + rotmats_0 = uniform_so3(num_batch,num_res,device=batch['aa'].device) + rotmats_0 = torch.where(batch['generate_mask'][...,None,None],rotmats_0,rotmats_1) + trans_0 = torch.randn((num_batch,num_res,3), device=batch['aa'].device) # scale with sigma? + # move center and receptor + trans_0_c,center = self.zero_center_part(trans_0,gen_mask,res_mask) + trans_0_c = torch.where(batch['generate_mask'][...,None],trans_0_c,trans_1_c) + else: + rotmats_0 = rotmats_1.detach().clone() + trans_0_c = trans_1_c.detach().clone() + if sample_ang: + # angle noise + angles_0 = torus.tor_random_uniform(angles_1.shape, device=batch['aa'].device, dtype=angles_1.dtype) # (B,L,5) + angles_0 = torch.where(batch['generate_mask'][...,None],angles_0,angles_1) + else: + angles_0 = angles_1.detach().clone() + if sample_seq: + seqs_0_simplex = k * torch.randn((num_batch,num_res,K), device=batch['aa'].device) + seqs_0_prob = F.softmax(seqs_0_simplex,dim=-1) + seqs_0 = sample_from(seqs_0_prob) + seqs_0 = torch.where(batch['generate_mask'],seqs_0,seqs_1) + seqs_0_simplex = torch.where(batch['generate_mask'][...,None],seqs_0_simplex,seqs_1_simplex) + else: + seqs_0 = seqs_1.detach().clone() + seqs_0_prob = seqs_1_prob.detach().clone() + seqs_0_simplex = seqs_1_simplex.detach().clone() + + # Set-up time + ts = torch.linspace(1.e-2, 1.0, num_steps) + t_1 = ts[0] + # prot_traj = [{'rotmats':rotmats_0,'trans':trans_0_c,'seqs':seqs_0,'seqs_simplex':seqs_0_simplex,'rotmats_1':rotmats_1,'trans_1':trans_1-center,'seqs_1':seqs_1}] + clean_traj = [] + rotmats_t_1, trans_t_1_c, angles_t_1, seqs_t_1, seqs_t_1_simplex = rotmats_0, trans_0_c, angles_0, seqs_0, seqs_0_simplex + + # denoise loop + for t_2 in ts[1:]: + t = torch.ones((num_batch, 1), device=batch['aa'].device) * t_1 + # rots + pred_rotmats_1, pred_trans_1, pred_angles_1, pred_seqs_1_prob = self.ga_encoder(t, rotmats_t_1, trans_t_1_c, angles_t_1, seqs_t_1, node_embed, edge_embed, batch['generate_mask'].long(), batch['res_mask'].long()) + pred_rotmats_1 = torch.where(batch['generate_mask'][...,None,None],pred_rotmats_1,rotmats_1) + # trans, move center + # pred_trans_1_c,center = self.zero_center_part(pred_trans_1,gen_mask,res_mask) + pred_trans_1_c = torch.where(batch['generate_mask'][...,None],pred_trans_1,trans_1_c) # move receptor also + # angles + pred_angles_1 = torch.where(batch['generate_mask'][...,None],pred_angles_1,angles_1) + # seqs + pred_seqs_1 = sample_from(F.softmax(pred_seqs_1_prob,dim=-1)) + pred_seqs_1 = torch.where(batch['generate_mask'],pred_seqs_1,seqs_1) + pred_seqs_1_simplex = self.seq_to_simplex(pred_seqs_1) + # seq-angle + torsion_mask = angle_mask_loss[pred_seqs_1.reshape(-1)].reshape(num_batch,num_res,-1) # (B,L,5) + pred_angles_1 = torch.where(torsion_mask.bool(),pred_angles_1,torch.zeros_like(pred_angles_1)) + if not sample_bb: + pred_trans_1_c = trans_1_c.detach().clone() + # _,center = self.zero_center_part(trans_1,gen_mask,res_mask) + pred_rotmats_1 = rotmats_1.detach().clone() + if not sample_ang: + pred_angles_1 = angles_1.detach().clone() + if not sample_seq: + pred_seqs_1 = seqs_1.detach().clone() + pred_seqs_1_simplex = seqs_1_simplex.detach().clone() + clean_traj.append({'rotmats':pred_rotmats_1.cpu(),'trans':pred_trans_1_c.cpu(),'angles':pred_angles_1.cpu(),'seqs':pred_seqs_1.cpu(),'seqs_simplex':pred_seqs_1_simplex.cpu(), + 'rotmats_1':rotmats_1.cpu(),'trans_1':trans_1_c.cpu(),'angles_1':angles_1.cpu(),'seqs_1':seqs_1.cpu()}) + # reverse step, also only for gen mask region + d_t = (t_2-t_1) * torch.ones((num_batch, 1), device=batch['aa'].device) + # Euler step + trans_t_2 = trans_t_1_c + (pred_trans_1_c-trans_0_c)*d_t[...,None] + # trans_t_2_c,center = self.zero_center_part(trans_t_2,gen_mask,res_mask) + trans_t_2_c = torch.where(batch['generate_mask'][...,None],trans_t_2,trans_1_c) # move receptor also + # rotmats_t_2 = so3_utils.geodesic_t(d_t[...,None] / (1-t[...,None]), pred_rotmats_1, rotmats_t_1) + rotmats_t_2 = so3_utils.geodesic_t(d_t[...,None] * 10, pred_rotmats_1, rotmats_t_1) + rotmats_t_2 = torch.where(batch['generate_mask'][...,None,None],rotmats_t_2,rotmats_1) + # angles + angles_t_2 = torus.tor_geodesic_t(d_t[...,None],pred_angles_1, angles_t_1) + angles_t_2 = torch.where(batch['generate_mask'][...,None],angles_t_2,angles_1) + # seqs + seqs_t_2_simplex = seqs_t_1_simplex + (pred_seqs_1_simplex - seqs_0_simplex) * d_t[...,None] + seqs_t_2 = sample_from(F.softmax(seqs_t_2_simplex,dim=-1)) + seqs_t_2 = torch.where(batch['generate_mask'],seqs_t_2,seqs_1) + # seq-angle + torsion_mask = angle_mask_loss[seqs_t_2.reshape(-1)].reshape(num_batch,num_res,-1) # (B,L,5) + angles_t_2 = torch.where(torsion_mask.bool(),angles_t_2,torch.zeros_like(angles_t_2)) + + if not sample_bb: + trans_t_2_c = trans_1_c.detach().clone() + rotmats_t_2 = rotmats_1.detach().clone() + if not sample_ang: + angles_t_2 = angles_1.detach().clone() + if not sample_seq: + seqs_t_2 = seqs_1.detach().clone() + rotmats_t_1, trans_t_1_c, angles_t_1, seqs_t_1, seqs_t_1_simplex = rotmats_t_2, trans_t_2_c, angles_t_2, seqs_t_2, seqs_t_2_simplex + t_1 = t_2 + + # final step + t_1 = ts[-1] + t = torch.ones((num_batch, 1), device=batch['aa'].device) * t_1 + pred_rotmats_1, pred_trans_1, pred_angles_1, pred_seqs_1_prob = self.ga_encoder(t, rotmats_t_1, trans_t_1_c, angles_t_1, seqs_t_1, node_embed, edge_embed, batch['generate_mask'].long(), batch['res_mask'].long()) + pred_rotmats_1 = torch.where(batch['generate_mask'][...,None,None],pred_rotmats_1,rotmats_1) + # move center + # pred_trans_1_c,center = self.zero_center_part(pred_trans_1,gen_mask,res_mask) + pred_trans_1_c = torch.where(batch['generate_mask'][...,None],pred_trans_1,trans_1_c) # move receptor also + # angles + pred_angles_1 = torch.where(batch['generate_mask'][...,None],pred_angles_1,angles_1) + # seqs + pred_seqs_1 = sample_from(F.softmax(pred_seqs_1_prob,dim=-1)) + pred_seqs_1 = torch.where(batch['generate_mask'],pred_seqs_1,seqs_1) + pred_seqs_1_simplex = self.seq_to_simplex(pred_seqs_1) + # seq-angle + torsion_mask = angle_mask_loss[pred_seqs_1.reshape(-1)].reshape(num_batch,num_res,-1) # (B,L,5) + pred_angles_1 = torch.where(torsion_mask.bool(),pred_angles_1,torch.zeros_like(pred_angles_1)) + if not sample_bb: + pred_trans_1_c = trans_1_c.detach().clone() + # _,center = self.zero_center_part(trans_1,gen_mask,res_mask) + pred_rotmats_1 = rotmats_1.detach().clone() + if not sample_ang: + pred_angles_1 = angles_1.detach().clone() + if not sample_seq: + pred_seqs_1 = seqs_1.detach().clone() + pred_seqs_1_simplex = seqs_1_simplex.detach().clone() + clean_traj.append({'rotmats':pred_rotmats_1.cpu(),'trans':pred_trans_1_c.cpu(),'angles':pred_angles_1.cpu(),'seqs':pred_seqs_1.cpu(),'seqs_simplex':pred_seqs_1_simplex.cpu(), + 'rotmats_1':rotmats_1.cpu(),'trans_1':trans_1_c.cpu(),'angles_1':angles_1.cpu(),'seqs_1':seqs_1.cpu()}) + + return clean_traj + + +# if __name__ == '__main__': +# prefix_dir = './pepflowww' +# # config,cfg_name = load_config("../configs/angle/learn_sc.yaml") +# config,cfg_name = load_config(os.path.join(prefix_dir,"configs/angle/learn_sc.yaml")) +# # print(config) +# device = 'cuda:0' +# dataset = PepDataset(structure_dir = config.dataset.val.structure_dir, dataset_dir = config.dataset.val.dataset_dir, +# name = config.dataset.val.name, transform=None, reset=config.dataset.val.reset) +# dataloader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=PaddingCollate(eight=False), num_workers=4, pin_memory=True) +# ckpt = torch.load("./checkpoints/600000.pt", map_location=device) +# seed_all(114514) +# model = FlowModel(config.model).to(device) +# model.load_state_dict(process_dic(ckpt['model'])) +# model.eval() + +# # print(model) + +# # print(dataset[0]['chain_id']) +# # print(dataset[0]['id']) +# # print(dataset[0]['resseq']) +# # print(dataset[0]['res_nb']) +# # print(dataset[0]['icode']) + +# dic = {'id':[],'len':[],'tran':[],'aar':[],'rot':[],'trans_loss':[],'rot_loss':[]} + +# # for batch in tqdm(dataloader): +# # batch = recursive_to(batch,device) +# for i in tqdm(range(len(dataset))): +# item = dataset[i] +# data_list = [deepcopy(item) for _ in range(16)] +# batch = recursive_to(collate_fn(data_list),device) +# loss_dic = model(batch) +# # traj_1 = model.sample(batch,num_steps=50,sample_bb=False,sample_ang=True,sample_seq=False) +# traj_1 = model.sample(batch,num_steps=50,sample_bb=True,sample_ang=True,sample_seq=True) +# ca_dist = torch.sqrt(torch.sum((traj_1[-1]['trans']-traj_1[-1]['trans_1'])**2*batch['generate_mask'][...,None].cpu().long()) / (torch.sum(batch['generate_mask']) + 1e-8).cpu()) # rmsd +# rot_dist = torch.sqrt(torch.sum((traj_1[-1]['rotmats']-traj_1[-1]['rotmats_1'])**2*batch['generate_mask'][...,None,None].long().cpu()) / (torch.sum(batch['generate_mask']) + 1e-8).cpu()) # rmsd +# aar = torch.sum((traj_1[-1]['seqs']==traj_1[-1]['seqs_1']) * batch['generate_mask'].long().cpu()) / (torch.sum(batch['generate_mask']).cpu() + 1e-8) + + +# print(loss_dic) +# print(f'tran:{ca_dist},rot:{rot_dist},aar:{aar},len:{batch["generate_mask"].sum().item()}') + +# # free +# torch.cuda.empty_cache() +# gc.collect() + +# # dic['tran'].append(ca_dist.item()) +# # dic['rot'].append(rot_dist.item()) +# dic['aar'].append(aar.item()) +# dic['trans_loss'].append(loss_dic['trans_loss'].item()) +# dic['rot_loss'].append(loss_dic['rot_loss'].item()) +# dic['id'].append(batch['id'][0]) +# dic['len'].append(batch['generate_mask'].sum().item()) +# # # break + +# # traj_1[-1]['batch'] = batch +# # torch.save(traj_1[-1],f'/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Pack_new/outputs/{batch["id"][0]}.pt') + +# # print(dic) +# # dic = pd.DataFrame(dic) +# # dic.to_csv(f'/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Pack/outputs.csv',index=None) + +# print(np.mean(dic['aar'])) +# print(np.mean(dic['trans_loss'])) + +# if __name__ == '__main__': +# config,cfg_name = load_config("./configs/angle/learn_angle.yaml") +# seed_all(114514) +# device = 'cpu' +# dataset = PepDataset(structure_dir = config.dataset.train.structure_dir, dataset_dir = config.dataset.train.dataset_dir, +# name = config.dataset.train.name, transform=None, reset=config.dataset.train.reset) +# dataloader = DataLoader(dataset, batch_size=2, shuffle=False, collate_fn=PaddingCollate(), num_workers=4, pin_memory=True) +# model = FlowModel(config.model).to(device) +# optimizer = torch.optim.Adam(model.parameters(),lr=1.e-4) + +# # ckpt = torch.load('./checkpoints/90000.pt',map_location=device) +# # model.load_state_dict(process_dic(ckpt['model'])) +# # optimizer.load_state_dict(ckpt['optimizer']) + + +# # torch.autograd.set_detect_anomaly(True) +# for i,batch in tqdm(enumerate(dataloader)): +# batch = recursive_to(batch,device) +# loss_dict = model(batch) +# loss = sum_weighted_losses(loss_dict, config.train.loss_weights) +# # if torch.isnan(loss): +# # print(i) +# # print(batch['id']) + +# loss.backward() +# orig_grad_norm = clip_grad_norm_(model.parameters(), config.train.max_grad_norm) + +# print(f'{loss_dict},{loss},{orig_grad_norm}') + +# optimizer.step() +# optimizer.zero_grad() \ No newline at end of file diff --git a/models_con/ga.py b/models_con/ga.py new file mode 100644 index 0000000000000000000000000000000000000000..90dd5aa4ba57b029dee7543d07132624e799d21f --- /dev/null +++ b/models_con/ga.py @@ -0,0 +1,127 @@ +import torch +from torch import nn + +from models_con import ipa_pytorch as ipa_pytorch +from data import utils as du + +from models_con.utils import get_index_embedding, get_time_embedding + +from pepflow.modules.protein.constants import ANG_TO_NM_SCALE, NM_TO_ANG_SCALE +from pepflow.modules.common.layers import AngularEncoding + +import math + + +class GAEncoder(nn.Module): + def __init__(self, ipa_conf): + super().__init__() + self._ipa_conf = ipa_conf + + # angles + self.angles_embedder = AngularEncoding(num_funcs=12) # 25*5=120, for competitive embedding size + self.angle_net = nn.Sequential( + nn.Linear(self._ipa_conf.c_s, self._ipa_conf.c_s),nn.ReLU(), + nn.Linear(self._ipa_conf.c_s, self._ipa_conf.c_s),nn.ReLU(), + nn.Linear(self._ipa_conf.c_s, 5) + # nn.Linear(self._ipa_conf.c_s, 22) + ) + + # for condition on current seq + self.current_seq_embedder = nn.Embedding(22, self._ipa_conf.c_s) + self.seq_net = nn.Sequential( + nn.Linear(self._ipa_conf.c_s, self._ipa_conf.c_s),nn.ReLU(), + nn.Linear(self._ipa_conf.c_s, self._ipa_conf.c_s),nn.ReLU(), + nn.Linear(self._ipa_conf.c_s, 20) + # nn.Linear(self._ipa_conf.c_s, 22) + ) + + # mixer + self.res_feat_mixer = nn.Sequential( + nn.Linear(3 * self._ipa_conf.c_s + self.angles_embedder.get_out_dim(in_dim=5), self._ipa_conf.c_s), + nn.ReLU(), + nn.Linear(self._ipa_conf.c_s, self._ipa_conf.c_s), + ) + + self.feat_dim = self._ipa_conf.c_s + + # Attention trunk + self.trunk = nn.ModuleDict() + for b in range(self._ipa_conf.num_blocks): + self.trunk[f'ipa_{b}'] = ipa_pytorch.InvariantPointAttention(self._ipa_conf) + self.trunk[f'ipa_ln_{b}'] = nn.LayerNorm(self._ipa_conf.c_s) + tfmr_in = self._ipa_conf.c_s + tfmr_layer = torch.nn.TransformerEncoderLayer( + d_model=tfmr_in, + nhead=self._ipa_conf.seq_tfmr_num_heads, + dim_feedforward=tfmr_in, + batch_first=True, + dropout=0.0, + norm_first=False + ) + self.trunk[f'seq_tfmr_{b}'] = torch.nn.TransformerEncoder( + tfmr_layer, self._ipa_conf.seq_tfmr_num_layers, enable_nested_tensor=False) + self.trunk[f'post_tfmr_{b}'] = ipa_pytorch.Linear( + tfmr_in, self._ipa_conf.c_s, init="final") + self.trunk[f'node_transition_{b}'] = ipa_pytorch.StructureModuleTransition( + c=self._ipa_conf.c_s) + self.trunk[f'bb_update_{b}'] = ipa_pytorch.BackboneUpdate( + self._ipa_conf.c_s, use_rot_updates=True) + + if b < self._ipa_conf.num_blocks-1: + # No edge update on the last block. + edge_in = self._ipa_conf.c_z + self.trunk[f'edge_transition_{b}'] = ipa_pytorch.EdgeTransition( + node_embed_size=self._ipa_conf.c_s, + edge_embed_in=edge_in, + edge_embed_out=self._ipa_conf.c_z, + ) + + def embed_t(self, timesteps, mask): + timestep_emb = get_time_embedding( + timesteps[:, 0], + self.feat_dim, + max_positions=2056 + )[:, None, :].repeat(1, mask.shape[1], 1) + return timestep_emb + + def forward(self, t, rotmats_t, trans_t, angles_t, seqs_t, node_embed, edge_embed, generate_mask, res_mask): + num_batch, num_res = seqs_t.shape + + # incorperate current seq and timesteps + node_mask = res_mask + edge_mask = node_mask[:, None] * node_mask[:, :, None] + + node_embed = self.res_feat_mixer(torch.cat([node_embed, self.current_seq_embedder(seqs_t), self.embed_t(t,node_mask), self.angles_embedder(angles_t).reshape(num_batch,num_res,-1)],dim=-1)) + node_embed = node_embed * node_mask[..., None] + curr_rigids = du.create_rigid(rotmats_t, trans_t) + for b in range(self._ipa_conf.num_blocks): + ipa_embed = self.trunk[f'ipa_{b}']( + node_embed, + edge_embed, + curr_rigids, + node_mask) + ipa_embed *= node_mask[..., None] + node_embed = self.trunk[f'ipa_ln_{b}'](node_embed + ipa_embed) + seq_tfmr_out = self.trunk[f'seq_tfmr_{b}']( + node_embed, src_key_padding_mask=(1 - node_mask).bool()) + node_embed = node_embed + self.trunk[f'post_tfmr_{b}'](seq_tfmr_out) + node_embed = self.trunk[f'node_transition_{b}'](node_embed) + node_embed = node_embed * node_mask[..., None] + rigid_update = self.trunk[f'bb_update_{b}']( + node_embed * node_mask[..., None]) + curr_rigids = curr_rigids.compose_q_update_vec( + rigid_update, node_mask[..., None]) + + if b < self._ipa_conf.num_blocks-1: + edge_embed = self.trunk[f'edge_transition_{b}']( + node_embed, edge_embed) + edge_embed *= edge_mask[..., None] + + # curr_rigids = self.rigids_nm_to_ang(curr_rigids) + pred_trans1 = curr_rigids.get_trans() + pred_rotmats1 = curr_rigids.get_rots().get_rot_mats() + pred_seqs1_prob = self.seq_net(node_embed) + pred_angles1 = self.angle_net(node_embed) + pred_angles1 = pred_angles1 % (2*math.pi) # inductive bias to bound between (0,2pi) + + return pred_rotmats1, pred_trans1, pred_angles1, pred_seqs1_prob \ No newline at end of file diff --git a/models_con/inference.py b/models_con/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..55f0d792f49a6af0a834fbbb32aa475a67280ce9 --- /dev/null +++ b/models_con/inference.py @@ -0,0 +1,101 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader + +import copy +import math +from tqdm.auto import tqdm +import functools +import os +import argparse +import pandas as pd +from copy import deepcopy + +from models_con.pep_dataloader import PepDataset + +from pepflow.utils.misc import load_config +from pepflow.utils.train import recursive_to + +from pepflow.modules.common.geometry import reconstruct_backbone, reconstruct_backbone_partially, align, batch_align +from pepflow.modules.protein.writers import save_pdb + +from pepflow.utils.data import PaddingCollate + +from models_con.utils import process_dic + +import gc + +from models_con.flow_model import FlowModel + +from pepflow.utils.misc import seed_all + +from models_con.torsion import full_atom_reconstruction, get_heavyatom_mask + +collate_fn = PaddingCollate(eight=False) + +import argparse + + +if __name__ == '__main__': + args = argparse.ArgumentParser() + args.add_argument('--config', type=str) + args.add_argument('--device', type=str) + args.add_argument('--ckpt', type=str) + args.add_argument('--output', type=str) + args.add_argument('--num_steps', type=int, default=200) + args.add_argument('--num_samples', type=int, default=64) + args.add_argument('--sample_bb', type=bool, default=True) + args.add_argument('--sample_ang', type=bool, default=True) + args.add_argument('--sample_seq', type=bool, default=True) + args.add_argument('--num_samples', type=int, default=64) + args.add_argument('--num_samples', type=int, default=64) + parser = args.parse_args() + + config,cfg_name = load_config(parser.config) + device = parser.device + dataset = PepDataset(structure_dir = config.dataset.val.structure_dir, dataset_dir = config.dataset.val.dataset_dir, + name = config.dataset.val.name, transform=None, reset=config.dataset.val.reset) + dataloader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=PaddingCollate(eight=False), num_workers=4, pin_memory=True) + ckpt = torch.load(parser.ckpt, map_location=device) + + seed_all(114514) + model = FlowModel(config.model).to(device) + model.load_state_dict(process_dic(ckpt['model'])) + model.eval() + + + dic = {'id':[],'len':[],'tran':[],'aar':[],'rot':[],'trans_loss':[],'rot_loss':[]} + + for i in tqdm(range(len(dataset))): + item = dataset[i] + data_list = [deepcopy(item) for _ in range(parser.num_samples)] + batch = recursive_to(collate_fn(data_list),device) + loss_dic = model(batch) + traj_1 = model.sample(batch,num_steps=parser.num_steps,sample_bb=parser.sample_bb,sample_ang=parser.sample_ang,sample_seq=parser.sample_seq) + ca_dist = torch.sqrt(torch.sum((traj_1[-1]['trans']-traj_1[-1]['trans_1'])**2*batch['generate_mask'][...,None].cpu().long()) / (torch.sum(batch['generate_mask']) + 1e-8).cpu()) # rmsd + rot_dist = torch.sqrt(torch.sum((traj_1[-1]['rotmats']-traj_1[-1]['rotmats_1'])**2*batch['generate_mask'][...,None,None].long().cpu()) / (torch.sum(batch['generate_mask']) + 1e-8).cpu()) # rmsd + aar = torch.sum((traj_1[-1]['seqs']==traj_1[-1]['seqs_1']) * batch['generate_mask'].long().cpu()) / (torch.sum(batch['generate_mask']).cpu() + 1e-8) + + + print(loss_dic) + print(f'tran:{ca_dist},rot:{rot_dist},aar:{aar},len:{batch["generate_mask"].sum().item()}') + + # free + torch.cuda.empty_cache() + gc.collect() + + dic['tran'].append(ca_dist.item()) + dic['rot'].append(rot_dist.item()) + dic['aar'].append(aar.item()) + dic['trans_loss'].append(loss_dic['trans_loss'].item()) + dic['rot_loss'].append(loss_dic['rot_loss'].item()) + dic['id'].append(batch['id'][0]) + dic['len'].append(batch['generate_mask'].sum().item()) + # break + + traj_1[-1]['batch'] = batch + torch.save(traj_1[-1],f'{parser.output}/outputs/{batch["id"][0]}.pt') + dic = pd.DataFrame(dic) + dic.to_csv(f'{parser.output}/outputs.csv',index=None) \ No newline at end of file diff --git a/models_con/ipa_pytorch.py b/models_con/ipa_pytorch.py new file mode 100644 index 0000000000000000000000000000000000000000..df5c1d2c3951d3f723dd59cfc0c7c6db967453af --- /dev/null +++ b/models_con/ipa_pytorch.py @@ -0,0 +1,687 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modified code of Openfold's IPA.""" + +import numpy as np +import torch +import math +from scipy.stats import truncnorm +import torch.nn as nn +from typing import Optional, Callable, List, Sequence +from openfold.utils.rigid_utils import Rigid +from data import all_atom + + +def permute_final_dims(tensor: torch.Tensor, inds: List[int]): + zero_index = -1 * len(inds) + first_inds = list(range(len(tensor.shape[:zero_index]))) + return tensor.permute(first_inds + [zero_index + i for i in inds]) + + +def flatten_final_dims(t: torch.Tensor, no_dims: int): + return t.reshape(t.shape[:-no_dims] + (-1,)) + + +def ipa_point_weights_init_(weights): + with torch.no_grad(): + softplus_inverse_1 = 0.541324854612918 + weights.fill_(softplus_inverse_1) + +def _prod(nums): + out = 1 + for n in nums: + out = out * n + return out + + +def _calculate_fan(linear_weight_shape, fan="fan_in"): + fan_out, fan_in = linear_weight_shape + + if fan == "fan_in": + f = fan_in + elif fan == "fan_out": + f = fan_out + elif fan == "fan_avg": + f = (fan_in + fan_out) / 2 + else: + raise ValueError("Invalid fan option") + + return f + +def trunc_normal_init_(weights, scale=1.0, fan="fan_in"): + shape = weights.shape + f = _calculate_fan(shape, fan) + scale = scale / max(1, f) + a = -2 + b = 2 + std = math.sqrt(scale) / truncnorm.std(a=a, b=b, loc=0, scale=1) + size = _prod(shape) + samples = truncnorm.rvs(a=a, b=b, loc=0, scale=std, size=size) + samples = np.reshape(samples, shape) + with torch.no_grad(): + weights.copy_(torch.tensor(samples, device=weights.device)) + + +def lecun_normal_init_(weights): + trunc_normal_init_(weights, scale=1.0) + + +def he_normal_init_(weights): + trunc_normal_init_(weights, scale=2.0) + + +def glorot_uniform_init_(weights): + nn.init.xavier_uniform_(weights, gain=1) + + +def final_init_(weights): + with torch.no_grad(): + weights.fill_(0.0) + + +def gating_init_(weights): + with torch.no_grad(): + weights.fill_(0.0) + + +def normal_init_(weights): + torch.nn.init.kaiming_normal_(weights, nonlinearity="linear") + + +def compute_angles(ca_pos, pts): + batch_size, num_res, num_heads, num_pts, _ = pts.shape + calpha_vecs = (ca_pos[:, :, None, :] - ca_pos[:, None, :, :]) + 1e-10 + calpha_vecs = torch.tile(calpha_vecs[:, :, :, None, None, :], (1, 1, 1, num_heads, num_pts, 1)) + ipa_pts = pts[:, :, None, :, :, :] - torch.tile(ca_pos[:, :, None, None, None, :], (1, 1, num_res, num_heads, num_pts, 1)) + phi_angles = all_atom.calculate_neighbor_angles( + calpha_vecs.reshape(-1, 3), + ipa_pts.reshape(-1, 3) + ).reshape(batch_size, num_res, num_res, num_heads, num_pts) + return phi_angles + + +class Linear(nn.Linear): + """ + A Linear layer with built-in nonstandard initializations. Called just + like torch.nn.Linear. + + Implements the initializers in 1.11.4, plus some additional ones found + in the code. + """ + + def __init__( + self, + in_dim: int, + out_dim: int, + bias: bool = True, + init: str = "default", + init_fn: Optional[Callable[[torch.Tensor, torch.Tensor], None]] = None, + ): + """ + Args: + in_dim: + The final dimension of inputs to the layer + out_dim: + The final dimension of layer outputs + bias: + Whether to learn an additive bias. True by default + init: + The initializer to use. Choose from: + + "default": LeCun fan-in truncated normal initialization + "relu": He initialization w/ truncated normal distribution + "glorot": Fan-average Glorot uniform initialization + "gating": Weights=0, Bias=1 + "normal": Normal initialization with std=1/sqrt(fan_in) + "final": Weights=0, Bias=0 + + Overridden by init_fn if the latter is not None. + init_fn: + A custom initializer taking weight and bias as inputs. + Overrides init if not None. + """ + super(Linear, self).__init__(in_dim, out_dim, bias=bias) + + if bias: + with torch.no_grad(): + self.bias.fill_(0) + + if init_fn is not None: + init_fn(self.weight, self.bias) + else: + if init == "default": + lecun_normal_init_(self.weight) + elif init == "relu": + he_normal_init_(self.weight) + elif init == "glorot": + glorot_uniform_init_(self.weight) + elif init == "gating": + gating_init_(self.weight) + if bias: + with torch.no_grad(): + self.bias.fill_(1.0) + elif init == "normal": + normal_init_(self.weight) + elif init == "final": + final_init_(self.weight) + else: + raise ValueError("Invalid init string.") + + +class StructureModuleTransition(nn.Module): + def __init__(self, c): + super(StructureModuleTransition, self).__init__() + + self.c = c + + self.linear_1 = Linear(self.c, self.c, init="relu") + self.linear_2 = Linear(self.c, self.c, init="relu") + self.linear_3 = Linear(self.c, self.c, init="final") + self.relu = nn.ReLU() + self.ln = nn.LayerNorm(self.c) + + def forward(self, s): + s_initial = s + s = self.linear_1(s) + s = self.relu(s) + s = self.linear_2(s) + s = self.relu(s) + s = self.linear_3(s) + s = s + s_initial + s = self.ln(s) + + return s + + +class EdgeTransition(nn.Module): + def __init__( + self, + *, + node_embed_size, + edge_embed_in, + edge_embed_out, + num_layers=2, + node_dilation=2 + ): + super(EdgeTransition, self).__init__() + + bias_embed_size = node_embed_size // node_dilation + self.initial_embed = Linear( + node_embed_size, bias_embed_size, init="relu") + hidden_size = bias_embed_size * 2 + edge_embed_in + trunk_layers = [] + for _ in range(num_layers): + trunk_layers.append(Linear(hidden_size, hidden_size, init="relu")) + trunk_layers.append(nn.ReLU()) + self.trunk = nn.Sequential(*trunk_layers) + self.final_layer = Linear(hidden_size, edge_embed_out, init="final") + self.layer_norm = nn.LayerNorm(edge_embed_out) + + def forward(self, node_embed, edge_embed): + node_embed = self.initial_embed(node_embed) + batch_size, num_res, _ = node_embed.shape + edge_bias = torch.cat([ + torch.tile(node_embed[:, :, None, :], (1, 1, num_res, 1)), + torch.tile(node_embed[:, None, :, :], (1, num_res, 1, 1)), + ], axis=-1) + edge_embed = torch.cat( + [edge_embed, edge_bias], axis=-1).reshape( + batch_size * num_res**2, -1) + edge_embed = self.final_layer(self.trunk(edge_embed) + edge_embed) + edge_embed = self.layer_norm(edge_embed) + edge_embed = edge_embed.reshape( + batch_size, num_res, num_res, -1 + ) + return edge_embed + + +class InvariantPointAttention(nn.Module): + """ + Implements Algorithm 22. + """ + def __init__( + self, + ipa_conf, + inf: float = 1e5, + eps: float = 1e-8, + ): + """ + Args: + c_s: + Single representation channel dimension + c_z: + Pair representation channel dimension + c_hidden: + Hidden channel dimension + no_heads: + Number of attention heads + no_qk_points: + Number of query/key points to generate + no_v_points: + Number of value points to generate + """ + super(InvariantPointAttention, self).__init__() + self._ipa_conf = ipa_conf + + self.c_s = ipa_conf.c_s + self.c_z = ipa_conf.c_z + self.c_hidden = ipa_conf.c_hidden + self.no_heads = ipa_conf.no_heads + self.no_qk_points = ipa_conf.no_qk_points + self.no_v_points = ipa_conf.no_v_points + self.inf = inf + self.eps = eps + + # These linear layers differ from their specifications in the + # supplement. There, they lack bias and use Glorot initialization. + # Here as in the official source, they have bias and use the default + # Lecun initialization. + hc = self.c_hidden * self.no_heads + self.linear_q = Linear(self.c_s, hc) + self.linear_kv = Linear(self.c_s, 2 * hc) + + hpq = self.no_heads * self.no_qk_points * 3 + self.linear_q_points = Linear(self.c_s, hpq) + + hpkv = self.no_heads * (self.no_qk_points + self.no_v_points) * 3 + self.linear_kv_points = Linear(self.c_s, hpkv) + + self.linear_b = Linear(self.c_z, self.no_heads) + self.down_z = Linear(self.c_z, self.c_z // 4) + + self.head_weights = nn.Parameter(torch.zeros((ipa_conf.no_heads))) + ipa_point_weights_init_(self.head_weights) + + concat_out_dim = ( + self.c_z // 4 + self.c_hidden + self.no_v_points * 4 + ) + self.linear_out = Linear(self.no_heads * concat_out_dim, self.c_s, init="final") + + self.softmax = nn.Softmax(dim=-1) + self.softplus = nn.Softplus() + + def forward( + self, + s: torch.Tensor, + z: Optional[torch.Tensor], + r: Rigid, + mask: torch.Tensor, + _offload_inference: bool = False, + _z_reference_list: Optional[Sequence[torch.Tensor]] = None, + ) -> torch.Tensor: + """ + Args: + s: + [*, N_res, C_s] single representation + z: + [*, N_res, N_res, C_z] pair representation + r: + [*, N_res] transformation object + mask: + [*, N_res] mask + Returns: + [*, N_res, C_s] single representation update + """ + if _offload_inference: + z = _z_reference_list + else: + z = [z] + + ####################################### + # Generate scalar and point activations + ####################################### + # [*, N_res, H * C_hidden] + q = self.linear_q(s) + kv = self.linear_kv(s) + + # [*, N_res, H, C_hidden] + q = q.view(q.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, H, 2 * C_hidden] + kv = kv.view(kv.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, H, C_hidden] + k, v = torch.split(kv, self.c_hidden, dim=-1) + + # [*, N_res, H * P_q * 3] + q_pts = self.linear_q_points(s) + + # This is kind of clunky, but it's how the original does it + # [*, N_res, H * P_q, 3] + q_pts = torch.split(q_pts, q_pts.shape[-1] // 3, dim=-1) + q_pts = torch.stack(q_pts, dim=-1) + q_pts = r[..., None].apply(q_pts) + + # [*, N_res, H, P_q, 3] + q_pts = q_pts.view( + q_pts.shape[:-2] + (self.no_heads, self.no_qk_points, 3) + ) + + # [*, N_res, H * (P_q + P_v) * 3] + kv_pts = self.linear_kv_points(s) + + # [*, N_res, H * (P_q + P_v), 3] + kv_pts = torch.split(kv_pts, kv_pts.shape[-1] // 3, dim=-1) + kv_pts = torch.stack(kv_pts, dim=-1) + kv_pts = r[..., None].apply(kv_pts) + + # [*, N_res, H, (P_q + P_v), 3] + kv_pts = kv_pts.view(kv_pts.shape[:-2] + (self.no_heads, -1, 3)) + + # [*, N_res, H, P_q/P_v, 3] + k_pts, v_pts = torch.split( + kv_pts, [self.no_qk_points, self.no_v_points], dim=-2 + ) + + ########################## + # Compute attention scores + ########################## + # [*, N_res, N_res, H] + b = self.linear_b(z[0]) + + if(_offload_inference): + z[0] = z[0].cpu() + + # [*, H, N_res, N_res] + a = torch.matmul( + permute_final_dims(q, (1, 0, 2)), # [*, H, N_res, C_hidden] + permute_final_dims(k, (1, 2, 0)), # [*, H, C_hidden, N_res] + ) + a *= math.sqrt(1.0 / (3 * self.c_hidden)) + a += (math.sqrt(1.0 / 3) * permute_final_dims(b, (2, 0, 1))) + + # [*, N_res, N_res, H, P_q, 3] + pt_displacement = q_pts.unsqueeze(-4) - k_pts.unsqueeze(-5) + pt_att = pt_displacement ** 2 + + # [*, N_res, N_res, H, P_q] + pt_att = sum(torch.unbind(pt_att, dim=-1)) + head_weights = self.softplus(self.head_weights).view( + *((1,) * len(pt_att.shape[:-2]) + (-1, 1)) + ) + head_weights = head_weights * math.sqrt( + 1.0 / (3 * (self.no_qk_points * 9.0 / 2)) + ) + pt_att = pt_att * head_weights + + # [*, N_res, N_res, H] + pt_att = torch.sum(pt_att, dim=-1) * (-0.5) + # [*, N_res, N_res] + square_mask = mask.unsqueeze(-1) * mask.unsqueeze(-2) + square_mask = self.inf * (square_mask - 1) + + # [*, H, N_res, N_res] + pt_att = permute_final_dims(pt_att, (2, 0, 1)) + + a = a + pt_att + a = a + square_mask.unsqueeze(-3) + a = self.softmax(a) + + ################ + # Compute output + ################ + # [*, N_res, H, C_hidden] + o = torch.matmul( + a, v.transpose(-2, -3) + ).transpose(-2, -3) + + # [*, N_res, H * C_hidden] + o = flatten_final_dims(o, 2) + + # [*, H, 3, N_res, P_v] + o_pt = torch.sum( + ( + a[..., None, :, :, None] + * permute_final_dims(v_pts, (1, 3, 0, 2))[..., None, :, :] + ), + dim=-2, + ) + + # [*, N_res, H, P_v, 3] + o_pt = permute_final_dims(o_pt, (2, 0, 3, 1)) + o_pt = r[..., None, None].invert_apply(o_pt) + + # [*, N_res, H * P_v] + o_pt_dists = torch.sqrt(torch.sum(o_pt ** 2, dim=-1) + self.eps) + o_pt_norm_feats = flatten_final_dims( + o_pt_dists, 2) + + # [*, N_res, H * P_v, 3] + o_pt = o_pt.reshape(*o_pt.shape[:-3], -1, 3) + + if(_offload_inference): + z[0] = z[0].to(o_pt.device) + + # [*, N_res, H, C_z // 4] + pair_z = self.down_z(z[0]) + o_pair = torch.matmul(a.transpose(-2, -3), pair_z) + + # [*, N_res, H * C_z // 4] + o_pair = flatten_final_dims(o_pair, 2) + + o_feats = [o, *torch.unbind(o_pt, dim=-1), o_pt_norm_feats, o_pair] + + # [*, N_res, C_s] + s = self.linear_out( + torch.cat( + o_feats, dim=-1 + ) + ) + + return s + + +class TorsionAngles(nn.Module): + def __init__(self, c, num_torsions, eps=1e-8): + super(TorsionAngles, self).__init__() + + self.c = c + self.eps = eps + self.num_torsions = num_torsions + + self.linear_1 = Linear(self.c, self.c, init="relu") + self.linear_2 = Linear(self.c, self.c, init="relu") + # TODO: Remove after published checkpoint is updated without these weights. + self.linear_3 = Linear(self.c, self.c, init="final") + self.linear_final = Linear( + self.c, self.num_torsions * 2, init="final") + + self.relu = nn.ReLU() + + def forward(self, s): + s_initial = s + s = self.linear_1(s) + s = self.relu(s) + s = self.linear_2(s) + + s = s + s_initial + unnormalized_s = self.linear_final(s) + norm_denom = torch.sqrt( + torch.clamp( + torch.sum(unnormalized_s ** 2, dim=-1, keepdim=True), + min=self.eps, + ) + ) + normalized_s = unnormalized_s / norm_denom + + return unnormalized_s, normalized_s + + +class RotationVFLayer(nn.Module): + def __init__(self, dim): + super(RotationVFLayer, self).__init__() + + self.linear_1 = Linear(dim, dim, init="relu") + self.linear_2 = Linear(dim, dim, init="relu") + self.linear_3 = Linear(dim, dim) + self.final_linear = Linear(dim, 6, init="final") + self.relu = nn.ReLU() + + def forward(self, s): + s_initial = s + s = self.linear_1(s) + s = self.relu(s) + s = self.linear_2(s) + s = self.relu(s) + s = self.linear_3(s) + s = s + s_initial + return self.final_linear(s) + + +class BackboneUpdate(nn.Module): + """ + Implements part of Algorithm 23. + """ + + def __init__(self, c_s, use_rot_updates): + """ + Args: + c_s: + Single representation channel dimension + """ + super(BackboneUpdate, self).__init__() + + self.c_s = c_s + self._use_rot_updates = use_rot_updates + update_dim = 6 if use_rot_updates else 3 + self.linear = Linear(self.c_s, update_dim, init="final") + + def forward(self, s: torch.Tensor): + """ + Args: + [*, N_res, C_s] single representation + Returns: + [*, N_res, 6] update vector + """ + # [*, 6] + update = self.linear(s) + + return update + +class IpaScore(nn.Module): + + def __init__(self, model_conf, diffuser): + super(IpaScore, self).__init__() + self._model_conf = model_conf + ipa_conf = model_conf.ipa + self._ipa_conf = ipa_conf + self.diffuser = diffuser + + self.scale_pos = lambda x: x * ipa_conf.coordinate_scaling + self.scale_rigids = lambda x: x.apply_trans_fn(self.scale_pos) + + self.unscale_pos = lambda x: x / ipa_conf.coordinate_scaling + self.unscale_rigids = lambda x: x.apply_trans_fn(self.unscale_pos) + self.trunk = nn.ModuleDict() + + for b in range(ipa_conf.num_blocks): + self.trunk[f'ipa_{b}'] = InvariantPointAttention(ipa_conf) + self.trunk[f'ipa_ln_{b}'] = nn.LayerNorm(ipa_conf.c_s) + self.trunk[f'skip_embed_{b}'] = Linear( + self._model_conf.node_embed_size, + self._ipa_conf.c_skip, + init="final" + ) + tfmr_in = ipa_conf.c_s + self._ipa_conf.c_skip + tfmr_layer = torch.nn.TransformerEncoderLayer( + d_model=tfmr_in, + nhead=ipa_conf.seq_tfmr_num_heads, + dim_feedforward=tfmr_in, + batch_first=True, + dropout=0.0, + norm_first=False + ) + self.trunk[f'seq_tfmr_{b}'] = torch.nn.TransformerEncoder( + tfmr_layer, ipa_conf.seq_tfmr_num_layers) + self.trunk[f'post_tfmr_{b}'] = Linear( + tfmr_in, ipa_conf.c_s, init="final") + self.trunk[f'node_transition_{b}'] = StructureModuleTransition( + c=ipa_conf.c_s) + self.trunk[f'bb_update_{b}'] = BackboneUpdate(ipa_conf.c_s) + + if b < ipa_conf.num_blocks-1: + # No edge update on the last block. + edge_in = self._model_conf.edge_embed_size + self.trunk[f'edge_transition_{b}'] = EdgeTransition( + node_embed_size=ipa_conf.c_s, + edge_embed_in=edge_in, + edge_embed_out=self._model_conf.edge_embed_size, + ) + + self.torsion_pred = TorsionAngles(ipa_conf.c_s, 1) + + def forward(self, init_node_embed, edge_embed, input_feats): + node_mask = input_feats['res_mask'].type(torch.float32) + diffuse_mask = (1 - input_feats['fixed_mask'].type(torch.float32)) * node_mask + edge_mask = node_mask[..., None] * node_mask[..., None, :] + init_frames = input_feats['rigids_t'].type(torch.float32) + + curr_rigids = Rigid.from_tensor_7(torch.clone(init_frames)) + init_rigids = Rigid.from_tensor_7(init_frames) + init_rots = init_rigids.get_rots() + + # Main trunk + curr_rigids = self.scale_rigids(curr_rigids) + init_node_embed = init_node_embed * node_mask[..., None] + node_embed = init_node_embed * node_mask[..., None] + for b in range(self._ipa_conf.num_blocks): + ipa_embed = self.trunk[f'ipa_{b}']( + node_embed, + edge_embed, + curr_rigids, + node_mask) + ipa_embed *= node_mask[..., None] + node_embed = self.trunk[f'ipa_ln_{b}'](node_embed + ipa_embed) + seq_tfmr_in = torch.cat([ + node_embed, self.trunk[f'skip_embed_{b}'](init_node_embed) + ], dim=-1) + seq_tfmr_out = self.trunk[f'seq_tfmr_{b}']( + seq_tfmr_in, src_key_padding_mask=1 - node_mask) + node_embed = node_embed + self.trunk[f'post_tfmr_{b}'](seq_tfmr_out) + node_embed = self.trunk[f'node_transition_{b}'](node_embed) + node_embed = node_embed * node_mask[..., None] + rigid_update = self.trunk[f'bb_update_{b}']( + node_embed * diffuse_mask[..., None]) + curr_rigids = curr_rigids.compose_q_update_vec( + rigid_update, diffuse_mask[..., None]) + + if b < self._ipa_conf.num_blocks-1: + edge_embed = self.trunk[f'edge_transition_{b}']( + node_embed, edge_embed) + edge_embed *= edge_mask[..., None] + rot_score = self.diffuser.calc_rot_score( + init_rigids.get_rots(), + curr_rigids.get_rots(), + input_feats['t'] + ) + rot_score = rot_score * node_mask[..., None] + + curr_rigids = self.unscale_rigids(curr_rigids) + trans_score = self.diffuser.calc_trans_score( + init_rigids.get_trans(), + curr_rigids.get_trans(), + input_feats['t'][:, None, None], + use_torch=True, + ) + trans_score = trans_score * node_mask[..., None] + _, psi_pred = self.torsion_pred(node_embed) + model_out = { + 'psi': psi_pred, + 'rot_score': rot_score, + 'trans_score': trans_score, + 'final_rigids': curr_rigids, + } + return model_out diff --git a/models_con/node.py b/models_con/node.py new file mode 100644 index 0000000000000000000000000000000000000000..ee2891c526a1396992a551d47d40007619ccb511 --- /dev/null +++ b/models_con/node.py @@ -0,0 +1,105 @@ +import torch +from torch import nn + +from pepflow.modules.common.geometry import construct_3d_basis, global_to_local, get_backbone_dihedral_angles +from pepflow.modules.common.layers import AngularEncoding +from pepflow.modules.protein.constants import BBHeavyAtom, AA + + +class NodeEmbedder(nn.Module): + + def __init__(self, feat_dim, max_num_atoms, max_aa_types=22): + super().__init__() + self.max_num_atoms = max_num_atoms + self.max_aa_types = max_aa_types + self.feat_dim = feat_dim + self.aatype_embed = nn.Embedding(self.max_aa_types, feat_dim) + self.dihed_embed = AngularEncoding() + + infeat_dim = feat_dim + (self.max_aa_types*max_num_atoms*3) + self.dihed_embed.get_out_dim(3) + self.mlp = nn.Sequential( + nn.Linear(infeat_dim, feat_dim * 2), nn.ReLU(), + nn.Linear(feat_dim * 2, feat_dim), nn.ReLU(), + nn.Linear(feat_dim, feat_dim), nn.ReLU(), + nn.Linear(feat_dim, feat_dim) + ) + + # def embed_t(self, timesteps, mask): + # timestep_emb = get_time_embedding( + # timesteps[:, 0], + # self.feat_dim, + # max_positions=2056 + # )[:, None, :].repeat(1, mask.shape[1], 1) + # return timestep_emb + + def forward(self, aa, res_nb, chain_nb, pos_atoms, mask_atoms, structure_mask=None, sequence_mask=None): + """ + Args: + aa: (N, L). + res_nb: (N, L). + chain_nb: (N, L). + pos_atoms: (N, L, A, 3). + mask_atoms: (N, L, A). + structure_mask: (N, L), mask out unknown structures to generate. + sequence_mask: (N, L), mask out unknown amino acids to generate. + """ + N, L = aa.size() + mask_residue = mask_atoms[:, :, BBHeavyAtom.CA] # (N, L) + + # Remove other atoms + pos_atoms = pos_atoms[:, :, :self.max_num_atoms] + mask_atoms = mask_atoms[:, :, :self.max_num_atoms] + + # Amino acid identity features + if sequence_mask is not None: + # Avoid data leakage at training time + aa = torch.where(sequence_mask, aa, torch.full_like(aa, fill_value=AA.UNK)) + aa_feat = self.aatype_embed(aa) # (N, L, feat) + + # Coordinate features + R = construct_3d_basis( + pos_atoms[:, :, BBHeavyAtom.CA], + pos_atoms[:, :, BBHeavyAtom.C], + pos_atoms[:, :, BBHeavyAtom.N] + ) + t = pos_atoms[:, :, BBHeavyAtom.CA] + crd = global_to_local(R, t, pos_atoms) # (N, L, A, 3) + crd_mask = mask_atoms[:, :, :, None].expand_as(crd) + crd = torch.where(crd_mask, crd, torch.zeros_like(crd)) + + aa_expand = aa[:, :, None, None, None].expand(N, L, self.max_aa_types, self.max_num_atoms, 3) + rng_expand = torch.arange(0, self.max_aa_types)[None, None, :, None, None].expand(N, L, self.max_aa_types, self.max_num_atoms, 3).to(aa_expand) + place_mask = (aa_expand == rng_expand) + crd_expand = crd[:, :, None, :, :].expand(N, L, self.max_aa_types, self.max_num_atoms, 3) + crd_expand = torch.where(place_mask, crd_expand, torch.zeros_like(crd_expand)) + crd_feat = crd_expand.reshape(N, L, self.max_aa_types*self.max_num_atoms*3) + if structure_mask is not None: + # Avoid data leakage at training time + crd_feat = crd_feat * structure_mask[:, :, None] + + # Backbone dihedral features + bb_dihedral, mask_bb_dihed = get_backbone_dihedral_angles(pos_atoms, chain_nb=chain_nb, res_nb=res_nb, mask=mask_residue) + dihed_feat = self.dihed_embed(bb_dihedral[:, :, :, None]) * mask_bb_dihed[:, :, :, None] # (N, L, 3, dihed/3) + dihed_feat = dihed_feat.reshape(N, L, -1) + if structure_mask is not None: + # Avoid data leakage at training time + dihed_mask = torch.logical_and( + structure_mask, + torch.logical_and( + torch.roll(structure_mask, shifts=+1, dims=1), + torch.roll(structure_mask, shifts=-1, dims=1) + ), + ) # Avoid slight data leakage via dihedral angles of anchor residues + dihed_feat = dihed_feat * dihed_mask[:, :, None] + + # # timestep + # timestep_emb = self.embed_t(timesteps, mask_residue) + + out_feat = self.mlp(torch.cat([aa_feat, crd_feat, dihed_feat], dim=-1)) # (N, L, F) + out_feat = out_feat * mask_residue[:, :, None] + + # print(f'aa_seq:{aa},aa:{aa_feat},crd:{crd_feat},dihed:{dihed_feat},time:{timestep_emb}') + + # print(f'weight:{self.aatype_embed.weight}') # nan, why? + + return out_feat \ No newline at end of file diff --git a/models_con/pep_dataloader.py b/models_con/pep_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..083700e83f211e91b2d23a1c0a13166752285195 --- /dev/null +++ b/models_con/pep_dataloader.py @@ -0,0 +1,212 @@ +"""pep-rec dataset""" +import os +import logging +import joblib +import pickle +import lmdb +from Bio import PDB +from Bio.PDB import PDBExceptions +from torch.utils.data import Dataset +from tqdm.auto import tqdm + +from pepflow.modules.protein.parsers import parse_pdb +from pepflow.modules.common.geometry import * +from pepflow.modules.protein.constants import * +from pepflow.utils.data import mask_select_data, find_longest_true_segment, PaddingCollate +from torch.utils.data import DataLoader + +from omegaconf import OmegaConf +from easydict import EasyDict + +from torch.utils.data import DataLoader, Dataset +from torch.utils.data.distributed import DistributedSampler, dist + +from pepflow.utils.misc import load_config +from pepflow.utils.train import recursive_to + +from models_con.torsion import get_torsion_angle + +import torch + +from pepflow.modules.protein.writers import save_pdb + +# bind_dic = torch.load("/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/misc/affinity_dict.pt") + +# testset +names = [] +with open('/datapool/data2/home/ruihan/data/jiahan/ResProj/PepDiff/pepflowww/Data/names.txt','r') as f: + for line in f: + names.append(line.strip()) + +def preprocess_structure(task): + + try: + if task['id'] in names: + raise ValueError(f'{task["id"]} not in names') + pdb_path = task['pdb_path'] + # pep + # process peptide and find center of mass + pep = parse_pdb(os.path.join(pdb_path,'peptide.pdb'))[0] + center = torch.sum(pep['pos_heavyatom'][pep['mask_heavyatom'][:, BBHeavyAtom.CA], BBHeavyAtom.CA], dim=0) / (torch.sum(pep['mask_heavyatom'][:, BBHeavyAtom.CA]) + 1e-8) + pep['pos_heavyatom'] = pep['pos_heavyatom'] - center[None, None, :] + pep['torsion_angle'],pep['torsion_angle_mask'] = get_torsion_angle(pep['pos_heavyatom'],pep['aa']) # calc angles after translation + if len(pep['aa'])<3 or len(pep['aa'])>25: + raise ValueError('peptide length not in [3,25]') + # rec + rec = parse_pdb(os.path.join(pdb_path,'pocket.pdb'))[0] + rec['pos_heavyatom'] = rec['pos_heavyatom'] - center[None, None, :] + rec['torsion_angle'],rec['torsion_angle_mask'] = get_torsion_angle(rec['pos_heavyatom'],rec['aa']) # calc angles after translation + rec['chain_nb'] += 1 + # meta data + data = {} + data['id'] = task['id'] + data['generate_mask'] = torch.cat([torch.zeros_like(rec['aa']), torch.ones_like(pep['aa'])], dim=0).bool() + for k in rec.keys(): + if isinstance(rec[k], torch.Tensor): + data[k] = torch.cat([rec[k], pep[k]], dim=0) + elif isinstance(rec[k], list): + data[k] = rec[k] + pep[k] + else: + raise ValueError(f'Unknown type of {rec[k]}') + return data + + except ( + PDBExceptions.PDBConstructionException, + KeyError, + ValueError, + TypeError + ) as e: + logging.warning('[{}] {}: {}'.format( + task['id'], + e.__class__.__name__, + str(e) + )) + return None + + +class PepDataset(Dataset): + + MAP_SIZE = 32*(1024*1024*1024) # 32GB + + def __init__(self, structure_dir = "./Data/PepMerge_new/", dataset_dir = "./Data/", + name = 'pep', transform=None, reset=False): + + super().__init__() + self.structure_dir = structure_dir + self.dataset_dir = dataset_dir + self.transform = transform + self.name = name + + self.db_conn = None + self.db_ids = None + self._load_structures(reset) + + @property + def _cache_db_path(self): + return os.path.join(self.dataset_dir, f'{self.name}_structure_cache.lmdb') + + def _connect_db(self): + self._close_db() + self.db_conn = lmdb.open( + self._cache_db_path, + map_size=self.MAP_SIZE, + create=False, + subdir=False, + readonly=True, + lock=False, + readahead=False, + meminit=False, + ) + with self.db_conn.begin() as txn: + keys = [k.decode() for k in txn.cursor().iternext(values=False)] + self.db_ids = keys + + def _close_db(self): + if self.db_conn is not None: + self.db_conn.close() + self.db_conn = None + self.db_ids = None + + def _load_structures(self, reset): + all_pdbs = os.listdir(self.structure_dir) + + if reset: + if os.path.exists(self._cache_db_path): + os.remove(self._cache_db_path) + lock_file = self._cache_db_path + "-lock" + if os.path.exists(lock_file): + os.remove(lock_file) + self._close_db() + todo_pdbs = all_pdbs + else: + if not os.path.exists(self._cache_db_path): + todo_pdbs = all_pdbs + else: + todo_pdbs = [] + # self._connect_db() + # processed_pdbs = self.db_ids + # self._close_db() + # todo_pdbs = list(set(all_pdbs) - set(processed_pdbs)) + + if len(todo_pdbs) > 0: + self._preprocess_structures(todo_pdbs) + + def _preprocess_structures(self, pdb_list): + tasks = [] + for pdb_fname in pdb_list: + pdb_path = os.path.join(self.structure_dir, pdb_fname) + tasks.append({ + 'id': pdb_fname, + 'pdb_path': pdb_path, + }) + + data_list = joblib.Parallel( + n_jobs = max(joblib.cpu_count() // 2, 1), + )( + joblib.delayed(preprocess_structure)(task) + for task in tqdm(tasks, dynamic_ncols=True, desc='Preprocess') + ) + + db_conn = lmdb.open( + self._cache_db_path, + map_size = self.MAP_SIZE, + create=True, + subdir=False, + readonly=False, + ) + ids = [] + with db_conn.begin(write=True, buffers=True) as txn: + for data in tqdm(data_list, dynamic_ncols=True, desc='Write to LMDB'): + if data is None: + continue + ids.append(data['id']) + txn.put(data['id'].encode('utf-8'), pickle.dumps(data)) + + def __len__(self): + self._connect_db() # make sure db_ids is not None + return len(self.db_ids) + + def __getitem__(self, index): + self._connect_db() + id = self.db_ids[index] + with self.db_conn.begin() as txn: + data = pickle.loads(txn.get(id.encode())) + if self.transform is not None: + data = self.transform(data) + return data + + + +if __name__ == '__main__': + device = 'cuda:1' + config,cfg_name = load_config("./configs/learn/learn_all.yaml") + dataset = PepDataset(structure_dir = "./Data/PepMerge_new/", dataset_dir = "/Data/Fixed Data", + name = 'pep_pocket_test', transform=None, reset=True) + print(len(dataset)) + print(dataset[0]) + + dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=PaddingCollate(eight=False)) + + batch = next(iter(dataloader)) + print(batch['torsion_angle'].shape) + print(batch['torsion_angle_mask'].shape) \ No newline at end of file diff --git a/models_con/sample.py b/models_con/sample.py new file mode 100644 index 0000000000000000000000000000000000000000..0b74a03a36c884efff1204763bb0e90e6b9c003d --- /dev/null +++ b/models_con/sample.py @@ -0,0 +1,145 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader + +import copy +import math +from tqdm.auto import tqdm +import functools +import os +import argparse +import pandas as pd +from copy import deepcopy + +from models_con.pep_dataloader import PepDataset + +from pepflow.utils.train import recursive_to + +from pepflow.modules.common.geometry import reconstruct_backbone, reconstruct_backbone_partially, align, batch_align +from pepflow.modules.protein.writers import save_pdb + +from pepflow.utils.data import PaddingCollate + +from models_con.utils import process_dic + +from models_con.flow_model import FlowModel + +from models_con.torsion import full_atom_reconstruction, get_heavyatom_mask + +collate_fn = PaddingCollate(eight=False) + +import argparse + + +def item_to_batch(item, nums=32): + data_list = [deepcopy(item) for i in range(nums)] + return collate_fn(data_list) + +def sample_for_data_bb(data, model, device, save_root, num_steps=200, sample_structure=True, sample_sequence=True, nums=8): + if not os.path.exists(os.path.join(save_root,data["id"])): + os.makedirs(os.path.join(save_root,data["id"])) + batch = recursive_to(item_to_batch(data, nums=nums),device=device) + traj = model.sample(batch, num_steps=num_steps, sample_structure=sample_structure, sample_sequence=sample_sequence) + final = recursive_to(traj[-1], device=device) + pos_bb = reconstruct_backbone(R=final['rotmats'],t=final['trans'],aa=final['seqs'],chain_nb=batch['chain_nb'],res_nb=batch['res_nb'],mask=batch['res_mask']) # (32,L,4,3) + pos_ha = F.pad(pos_bb, pad=(0,0,0,15-4), value=0.) # (32,L,A,3) pos14 A=14 + pos_new = torch.where(batch['generate_mask'][:,:,None,None],pos_ha,batch['pos_heavyatom']) + mask_bb_atoms = torch.zeros_like(batch['mask_heavyatom']) + mask_bb_atoms[:,:,:4] = True + mask_new = torch.where(batch['generate_mask'][:,:,None],mask_bb_atoms,batch['mask_heavyatom']) + aa_new = final['seqs'] + + chain_nb = torch.LongTensor([0 if gen_mask else 1 for gen_mask in data['generate_mask']]) + chain_id = ['A' if gen_mask else 'B' for gen_mask in data['generate_mask']] + icode = [' ' for _ in range(len(data['icode']))] + for i in range(nums): + ref_bb_pos = data['pos_heavyatom'][i][:,:4].cpu() + pred_bb_pos = pos_new[i][:,:4].cpu() + data_saved = { + 'chain_nb':data['chain_nb'],'chain_id':data['chain_id'],'resseq':data['resseq'],'icode':data['icode'], + 'aa':aa_new[i].cpu(), 'mask_heavyatom':mask_new[i].cpu(), 'pos_heavyatom':pos_new[i].cpu(), + } + + save_pdb(data_saved,path=os.path.join(save_root,data["id"],f'{data["id"]}_{i}.pdb')) + save_pdb(data,path=os.path.join(save_root,data["id"],f'{data["id"]}_gt.pdb')) + +def save_samples_bb(samples,save_dir): + # meta data + batch = recursive_to(samples['batch'],'cpu') + chain_id = [list(item) for item in zip(*batch['chain_id'])][0] # fix chain id in collate func + icode = [' ' for _ in range(len(chain_id))] # batch icode have same problem + nums = len(batch['id']) + id = batch['id'][0] + # batch convert + # aa=batch['aa] if only bb level + pos_bb = reconstruct_backbone(R=samples['rotmats'],t=samples['trans'],aa=samples['seqs'],chain_nb=batch['chain_nb'],res_nb=batch['res_nb'],mask=batch['res_mask']) # (32,L,4,3) + pos_ha = F.pad(pos_bb, pad=(0,0,0,15-4), value=0.) # (32,L,A,3) pos14 A=14 + pos_new = torch.where(batch['generate_mask'][:,:,None,None],pos_ha,batch['pos_heavyatom']) + mask_bb_atoms = torch.zeros_like(batch['mask_heavyatom']) + mask_bb_atoms[:,:,:4] = True + mask_new = torch.where(batch['generate_mask'][:,:,None],mask_bb_atoms,batch['mask_heavyatom']) + aa_new = samples['seqs'] + for i in range(nums): + data_saved = { + 'chain_nb':batch['chain_nb'][0],'chain_id':chain_id,'resseq':batch['resseq'][0],'icode':icode, + 'aa':aa_new[i], 'mask_heavyatom':mask_new[i], 'pos_heavyatom':pos_new[i], + } + save_pdb(data_saved,path=os.path.join(save_dir,f'sample_{i}.pdb')) + data_saved = { + 'chain_nb':batch['chain_nb'][0],'chain_id':chain_id,'resseq':batch['resseq'][0],'icode':icode, + 'aa':batch['aa'][0], 'mask_heavyatom':batch['mask_heavyatom'][0], 'pos_heavyatom':batch['pos_heavyatom'][0], + } + save_pdb(data_saved,path=os.path.join(save_dir,f'gt.pdb')) + +def save_samples_sc(samples,save_dir): + # meta data + batch = recursive_to(samples['batch'],'cpu') + chain_id = [list(item) for item in zip(*batch['chain_id'])][0] # fix chain id in collate func + icode = [' ' for _ in range(len(chain_id))] # batch icode have same problem + nums = len(batch['id']) + id = batch['id'][0] + # batch convert + # aa=batch['aa] if only bb level + pos_ha,_,_ = full_atom_reconstruction(R_bb=samples['rotmats'],t_bb=samples['trans'],angles=samples['angles'],aa=samples['seqs']) # (32,L,14,3), instead of 15, ignore OXT masked + pos_ha = F.pad(pos_ha, pad=(0,0,0,15-14), value=0.) # (32,L,A,3) pos14 A=14 + pos_new = torch.where(batch['generate_mask'][:,:,None,None],pos_ha,batch['pos_heavyatom']) + mask_new = get_heavyatom_mask(samples['seqs']) + aa_new = samples['seqs'] + for i in range(nums): + data_saved = { + 'chain_nb':batch['chain_nb'][0],'chain_id':chain_id,'resseq':batch['resseq'][0],'icode':icode, + 'aa':aa_new[i], 'mask_heavyatom':mask_new[i], 'pos_heavyatom':pos_new[i], + } + save_pdb(data_saved,path=os.path.join(save_dir,f'sample_{i}.pdb')) + data_saved = { + 'chain_nb':batch['chain_nb'][0],'chain_id':chain_id,'resseq':batch['resseq'][0],'icode':icode, + 'aa':batch['aa'][0], 'mask_heavyatom':batch['mask_heavyatom'][0], 'pos_heavyatom':batch['pos_heavyatom'][0], + } + save_pdb(data_saved,path=os.path.join(save_dir,f'gt.pdb')) + +if __name__ == '__main__': + # sample = torch.load('./Codesign/outputs/1aze_B.pt') + # save_samples_sc(sample,'./misc/test') + # save_samples_bb(sample,'./misc/test') + # for k,v in sample.items(): + # if isinstance(v,torch.Tensor): + # print(f'{k},{v.shape}') + + # # subdir = 'bb_seq_angle' # bb,bb_seq,bb_seq_angle + # names = [n.split('.')[0] for n in os.listdir(os.path.join(SAMPLE_DIR,subdir,'outputs'))] + # for name in tqdm(names): + # sample = torch.load(os.path.join(SAMPLE_DIR,subdir,'outputs',f'{name}.pt')) + # os.makedirs(os.path.join(SAMPLE_DIR,subdir,'pdbs',name),exist_ok=True) + # save_samples_sc(sample,os.path.join(SAMPLE_DIR,subdir,'pdbs',name)) + + args = argparse.ArgumentParser() + args.add_argument('--SAMPLEDIR', type=str) + parser = args.parse_args() + SAMPLE_DIR = parser.SAMPLEDIR + names = [n.split('.')[0] for n in os.listdir(os.path.join(SAMPLE_DIR,'outputs'))] + for name in tqdm(names): + sample = torch.load(os.path.join(SAMPLE_DIR,'outputs',f'{name}.pt')) + os.makedirs(os.path.join(SAMPLE_DIR,'pdbs',name),exist_ok=True) + save_samples_sc(sample,os.path.join(SAMPLE_DIR,'pdbs',name)) \ No newline at end of file diff --git a/models_con/torsion.py b/models_con/torsion.py new file mode 100644 index 0000000000000000000000000000000000000000..c450b103d27453475e8ba25c0b598bccbf3dad5d --- /dev/null +++ b/models_con/torsion.py @@ -0,0 +1,239 @@ +import torch +import math + +from typing import Any, Optional, Union, cast + +from pepflow.modules.common.geometry import * +import pepflow.modules.protein.constants as constants + +""" +calc torsion angles between (0,2pi) +""" + +def _get_torsion(p0, p1, p2, p3): + """ + Args: + p0-3: (*, 3). + Returns: + Dihedral angles in radian, (*, ). + """ + v0 = p2 - p1 + v1 = p0 - p1 + v2 = p3 - p2 + u1 = torch.cross(v0, v1, dim=-1) + n1 = u1 / torch.linalg.norm(u1, dim=-1, keepdim=True) + u2 = torch.cross(v0, v2, dim=-1) + n2 = u2 / torch.linalg.norm(u2, dim=-1, keepdim=True) + sgn = torch.sign( (torch.cross(v1, v2, dim=-1) * v0).sum(-1) ) + dihed = sgn*torch.acos( (n1 * n2).sum(-1).clamp(min=-0.999999, max=0.999999)) + return dihed + +def get_chi_angles(restype, pos14): + chi_angles = torch.full([4], fill_value=float("inf")).to(pos14) + base_atom_names = constants.chi_angles_atoms[restype] + for i, four_atom_names in enumerate(base_atom_names): + atom_indices = [constants.restype_atom14_name_to_index[restype][a] for a in four_atom_names] + p = torch.stack([pos14[i] for i in atom_indices]) + # if torch.eq(p, 99999).any(): + # continue + torsion = _get_torsion(*torch.unbind(p, dim=0)) + chi_angles[i] = torsion + return chi_angles + + +def get_psi_angle(pos14: torch.Tensor) -> torch.Tensor: + return _get_torsion(pos14[0], pos14[1], pos14[2], pos14[3]).reshape([1]) # af style psi, N,CA,C,O + + +def get_torsion_angle(pos14: torch.Tensor, aa: torch.LongTensor): + torsion, torsion_mask = [], [] + for i in range(pos14.shape[0]): + if aa[i] < constants.AA.UNK: # 0-19 + chi = get_chi_angles(aa[i].item(), pos14[i]) + psi = get_psi_angle(pos14[i]) + torsion_this = torch.cat([psi, chi], dim=0) + torsion_mask_this = torsion_this.isfinite() + else: + torsion_this = torch.full([5], 0.) + torsion_mask_this = torch.full([5], False) + torsion.append(torsion_this.nan_to_num(posinf=0.)) + torsion_mask.append(torsion_mask_this) + + torsion = torch.stack(torsion) % (2*math.pi) + torsion_mask = torch.stack(torsion_mask).bool() + + return torsion, torsion_mask + +def _make_psi_chi_rotation_matrices(angles: torch.Tensor) -> torch.Tensor: + """Compute psi and chi rotation matrices from torsional angles. + + Here we provide angles instead of alpha in af2 between (0,2pi) + + See alphafold supplementary Algorithm 25 for details. + + Args: + angles: (B, N, 5), angles between (0,2pi) + + Returns: + Torsional angle rotation matrices, (B, N, 5, 3, 3). + """ + batch_size, n_res = angles.shape[:2] + sine,cosine = torch.sin(angles), torch.cos(angles) + sine = sine.reshape(batch_size, n_res, -1, 1, 1) + cosine = cosine.reshape(batch_size, n_res, -1, 1, 1) + zero = torch.zeros_like(sine) + one = torch.ones_like(sine) + + row1 = torch.cat([one, zero, zero], dim=-1) # (B, N, 5, 1, 3) + row2 = torch.cat([zero, cosine, -sine], dim=-1) # (B, N, 5, 1, 3) + row3 = torch.cat([zero, sine, cosine], dim=-1) # (B, N, 5, 1, 3) + R = torch.cat([row1, row2, row3], dim=-2) # (B, N, 5, 3, 3) + + return R + + +def _get_rigid_group(aa: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Extract rigid group constants. + + Args: + aa: Amino acid types, (B, N). + + Returns: + A tuple of rigid group rotation, translation, atom14 group and atom14 position. + """ + batch_size, n_res = aa.size() + aa = aa.flatten() + rotation = constants.restype_rigid_group_rotation.to(aa.device)[aa].reshape(batch_size, n_res, 8, 3, 3) + translation = constants.restype_rigid_group_translation.to(aa.device)[aa].reshape(batch_size, n_res, 8, 3) + atom14_group = constants.restype_heavyatom_to_rigid_group.to(aa.device)[aa].reshape(batch_size, n_res, 14) + atom14_position = constants.restype_heavyatom_rigid_group_positions.to(aa.device)[aa].reshape( + batch_size, n_res, 14, 3 + ) + return rotation, translation, atom14_group, atom14_position + + +# construct heavy atom masks for genrating +# restype_to_heavyatom_masks = { +# restype: [name != "" and name !='OXT' for name in names] +# for restype, names in constants.restype_to_heavyatom_names.items() +# } +# print(restype_to_heavyatom_masks[0]) + +restype_to_heavyatom_masks = torch.zeros([22,15]).bool() +for i in range(21): + restype_to_heavyatom_masks[i] = torch.tensor([name != "" and name !='OXT' for name in constants.restype_to_heavyatom_names[i]]).bool() + +def get_heavyatom_mask(aa: torch.Tensor) -> torch.Tensor: + """Compute heavy atom masks from amino acid types. + + Args: + aa: Amino acid types, (B, N). + + Returns: + Heavy atom masks, (B, N, 15). + """ + batch_size, n_res = aa.size() + aa = aa.flatten() + mask = restype_to_heavyatom_masks.to(aa.device)[aa].reshape(batch_size, n_res, 15) + return mask + +def full_atom_reconstruction( + R_bb: torch.Tensor, + t_bb: torch.Tensor, + angles: torch.Tensor, + aa: torch.Tensor, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute full atom positions from backbone frames and torsional angles. + + See alphafold supplementary Algorithm 24 for details. + + Args: + R_bb: Rotation of backbone frames, (B, N, 3, 3). + t_bb: Translation of backbone frames, (B, N, 3). + angles: (B, N, 5), angles between (0,2pi) + aa: Amino acid types, (B, N). + + Returns: + A tuple of atom positions and full frames, (pos14, R, t). + pos14: Full atom positions in pos14 representations, (B, N, 14, 3). + R: Rotation of backbone, psi, chi1-4 frames, (B, N, 5, 3, 3). + t: Rotation of backbone, psi, chi1-4 frames, (B, N, 5, 3). + """ + N, L = aa.size() + + rot_psi, rot_chi1, rot_chi2, rot_chi3, rot_chi4 = _make_psi_chi_rotation_matrices(angles).unbind(dim=2) + # (B, N, 3, 3) + zeros = torch.zeros_like(t_bb) + + rigid_rotation, rigid_translation, atom14_group, atom14_position = _get_rigid_group(aa) + + R_psi, t_psi = compose_chain( + [ + (R_bb, t_bb), + (rigid_rotation[:, :, constants.PSI_FRAME], rigid_translation[:, :, constants.PSI_FRAME]), + (rot_psi, zeros), + ] + ) + + R_chi1, t_chi1 = compose_chain( + [ + (R_bb, t_bb), + (rigid_rotation[:, :, constants.CHI1_FRAME], rigid_translation[:, :, constants.CHI1_FRAME]), + (rot_chi1, zeros), + ] + ) + + R_chi2, t_chi2 = compose_chain( + [ + (R_chi1, t_chi1), + (rigid_rotation[:, :, constants.CHI2_FRAME], rigid_translation[:, :, constants.CHI2_FRAME]), + (rot_chi2, zeros), + ] + ) + + R_chi3, t_chi3 = compose_chain( + [ + (R_chi2, t_chi2), + (rigid_rotation[:, :, constants.CHI3_FRAME], rigid_translation[:, :, constants.CHI3_FRAME]), + (rot_chi3, zeros), + ] + ) + + R_chi4, t_chi4 = compose_chain( + [ + (R_chi3, t_chi3), + (rigid_rotation[:, :, constants.CHI4_FRAME], rigid_translation[:, :, constants.CHI4_FRAME]), + (rot_chi4, zeros), + ] + ) + + # Return Frame + R_ret = torch.stack([R_bb, R_psi, R_chi1, R_chi2, R_chi3, R_chi4], dim=2) + t_ret = torch.stack([t_bb, t_psi, t_chi1, t_chi2, t_chi3, t_chi4], dim=2) + + # Backbone, Omega, Phi, Psi, Chi1,2,3,4 + R_all = torch.stack([R_bb, R_bb, R_bb, R_psi, R_chi1, R_chi2, R_chi3, R_chi4], dim=2) # (B, N, 8, 3, 3) + t_all = torch.stack([t_bb, t_bb, t_bb, t_psi, t_chi1, t_chi2, t_chi3, t_chi4], dim=2) # (B, N, 8, 3) + + index_R = atom14_group.reshape(N, L, 14, 1, 1).repeat(1, 1, 1, 3, 3) # (B, N, 14, 3, 3) + index_t = atom14_group.reshape(N, L, 14, 1).repeat(1, 1, 1, 3) # (B, N, 14, 3) + + R_atom = torch.gather(R_all, dim=2, index=index_R) # (N, L, 14, 3, 3) + t_atom = torch.gather(t_all, dim=2, index=index_t) # (N, L, 14, 3) + p_atom = atom14_position # (N, L, 14, 3) + + pos14 = torch.matmul(R_atom, p_atom.unsqueeze(-1)).squeeze(-1) + t_atom + return pos14, R_ret, t_ret + + + +torsions_mask = torch.zeros([22,5]).float() # 0-19, X, PAD +for i in range(21): + torsions_mask[i] = torch.tensor([True] + constants.chi_angles_mask[i]).float() +# print(angles_mask) + +if __name__ =='__main__': + aa = torch.full([3,8],fill_value=constants.AA.THR).long() + mask = get_heavyatom_mask(aa) + print(mask) + print(mask.shape) \ No newline at end of file diff --git a/models_con/torus.py b/models_con/torus.py new file mode 100644 index 0000000000000000000000000000000000000000..1fa43a05f4f207e5e607a3932efbe8c77f750de5 --- /dev/null +++ b/models_con/torus.py @@ -0,0 +1,34 @@ +import math +import torch + + +def tor_expmap(x: torch.Tensor, u: torch.Tensor) -> torch.Tensor: + return (x + u) % (2 * math.pi) + +def tor_logmap(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + return torch.atan2(torch.sin(y - x), torch.cos(y - x)) + +def tor_projx(x: torch.Tensor) -> torch.Tensor: + return x % (2 * math.pi) + +def tor_random_uniform(*size, dtype=None, device=None) -> torch.Tensor: + z = torch.rand(*size, dtype=dtype, device=device) + return z * 2 * math.pi + +def tor_uniform_logprob(x): + dim = x.shape[-1] + return torch.full_like(x[..., 0], -dim * math.log(2 * math.pi)) + +def tor_geodesic_t(t, angles_1, angles_0): + # target, base + tangent_vec = t * tor_logmap(angles_0, angles_1) + points_at_time_t = tor_expmap(angles_0, tangent_vec) + return points_at_time_t + +if __name__ =='__main__': + a = tor_random_uniform((2,3,5)) + b = tor_random_uniform((2,3,5)) + t = torch.ones((2,1)) * 0.2 + c = tor_geodesic_t(t[...,None],a,b) + print(c) + print(c.shape) \ No newline at end of file diff --git a/models_con/utils.py b/models_con/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..78d80f45abc370246a603bb1c3de0e6977856719 --- /dev/null +++ b/models_con/utils.py @@ -0,0 +1,72 @@ +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +import copy +import math +from tqdm.auto import tqdm +import functools +from torch.utils.data import DataLoader +import os +import argparse + +import pandas as pd + +def process_dic(state_dict): + new_state_dict = {} + for k,v in state_dict.items(): + if 'module' in k: + new_state_dict[k[7:]] = v + else: + new_state_dict[k] = v + return new_state_dict + + +def calc_distogram(pos, min_bin, max_bin, num_bins): + dists_2d = torch.linalg.norm( + pos[:, :, None, :] - pos[:, None, :, :], axis=-1)[..., None] + lower = torch.linspace( + min_bin, + max_bin, + num_bins, + device=pos.device) + upper = torch.cat([lower[1:], lower.new_tensor([1e8])], dim=-1) + dgram = ((dists_2d > lower) * (dists_2d < upper)).type(pos.dtype) + return dgram + + +def get_index_embedding(indices, embed_size, max_len=2056): + """Creates sine / cosine positional embeddings from a prespecified indices. + + Args: + indices: offsets of size [..., N_edges] of type integer + max_len: maximum length. + embed_size: dimension of the embeddings to create + + Returns: + positional embedding of shape [N, embed_size] + """ + K = torch.arange(embed_size//2, device=indices.device) + pos_embedding_sin = torch.sin( + indices[..., None] * math.pi / (max_len**(2*K[None]/embed_size))).to(indices.device) + pos_embedding_cos = torch.cos( + indices[..., None] * math.pi / (max_len**(2*K[None]/embed_size))).to(indices.device) + pos_embedding = torch.cat([ + pos_embedding_sin, pos_embedding_cos], axis=-1) + return pos_embedding + + +def get_time_embedding(timesteps, embedding_dim, max_positions=2000): + # Code from https://github.com/hojonathanho/diffusion/blob/master/diffusion_tf/nn.py + assert len(timesteps.shape) == 1 + timesteps = timesteps * max_positions + half_dim = embedding_dim // 2 + emb = math.log(max_positions) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32, device=timesteps.device) * -emb) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = F.pad(emb, (0, 1), mode='constant') + assert emb.shape == (timesteps.shape[0], embedding_dim) + return emb \ No newline at end of file diff --git a/openfold/config.py b/openfold/config.py new file mode 100644 index 0000000000000000000000000000000000000000..b73acb91d367c07f3c99cf7400883041baf67a7a --- /dev/null +++ b/openfold/config.py @@ -0,0 +1,4 @@ +NUM_RES = "num residues placeholder" +NUM_MSA_SEQ = "msa placeholder" +NUM_EXTRA_SEQ = "extra msa placeholder" +NUM_TEMPLATES = "num templates placeholder" diff --git a/openfold/model/__init__.py b/openfold/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25d1a5ba4be7300076d6f44af1541a33ca9e4ab1 --- /dev/null +++ b/openfold/model/__init__.py @@ -0,0 +1,16 @@ +import os +import glob +import importlib as importlib + +_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py")) +__all__ = [ + os.path.basename(f)[:-3] + for f in _files + if os.path.isfile(f) and not f.endswith("__init__.py") +] +_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__] +for _m in _modules: + globals()[_m[0]] = _m[1] + +# Avoid needlessly cluttering the global namespace +del _files, _m, _modules diff --git a/openfold/model/dropout.py b/openfold/model/dropout.py new file mode 100644 index 0000000000000000000000000000000000000000..651b9775ef44fba20dec75c60703f00beac66e0c --- /dev/null +++ b/openfold/model/dropout.py @@ -0,0 +1,78 @@ +# Copyright 2021 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import torch +import torch.nn as nn +from functools import partialmethod +from typing import Union, List + + +class Dropout(nn.Module): + """ + Implementation of dropout with the ability to share the dropout mask + along a particular dimension. + + If not in training mode, this module computes the identity function. + """ + + def __init__(self, r: float, batch_dim: Union[int, List[int]]): + """ + Args: + r: + Dropout rate + batch_dim: + Dimension(s) along which the dropout mask is shared + """ + super(Dropout, self).__init__() + + self.r = r + if type(batch_dim) == int: + batch_dim = [batch_dim] + self.batch_dim = batch_dim + self.dropout = nn.Dropout(self.r) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x: + Tensor to which dropout is applied. Can have any shape + compatible with self.batch_dim + """ + shape = list(x.shape) + if self.batch_dim is not None: + for bd in self.batch_dim: + shape[bd] = 1 + mask = x.new_ones(shape) + mask = self.dropout(mask) + x *= mask + return x + + +class DropoutRowwise(Dropout): + """ + Convenience class for rowwise dropout as described in subsection + 1.11.6. + """ + + __init__ = partialmethod(Dropout.__init__, batch_dim=-3) + + +class DropoutColumnwise(Dropout): + """ + Convenience class for columnwise dropout as described in subsection + 1.11.6. + """ + + __init__ = partialmethod(Dropout.__init__, batch_dim=-2) diff --git a/openfold/model/embedders.py b/openfold/model/embedders.py new file mode 100644 index 0000000000000000000000000000000000000000..e1f1f9981c9965be630fd1ccb98ebdcf0fe42dc5 --- /dev/null +++ b/openfold/model/embedders.py @@ -0,0 +1,352 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +from typing import Tuple + +from openfold.model.primitives import Linear, LayerNorm +from openfold.utils.tensor_utils import one_hot + + +class InputEmbedder(nn.Module): + """ + Embeds a subset of the input features. + + Implements Algorithms 3 (InputEmbedder) and 4 (relpos). + """ + + def __init__( + self, + tf_dim: int, + msa_dim: int, + c_z: int, + c_m: int, + relpos_k: int, + **kwargs, + ): + """ + Args: + tf_dim: + Final dimension of the target features + msa_dim: + Final dimension of the MSA features + c_z: + Pair embedding dimension + c_m: + MSA embedding dimension + relpos_k: + Window size used in relative positional encoding + """ + super(InputEmbedder, self).__init__() + + self.tf_dim = tf_dim + self.msa_dim = msa_dim + + self.c_z = c_z + self.c_m = c_m + + self.linear_tf_z_i = Linear(tf_dim, c_z) + self.linear_tf_z_j = Linear(tf_dim, c_z) + self.linear_tf_m = Linear(tf_dim, c_m) + self.linear_msa_m = Linear(msa_dim, c_m) + + # RPE stuff + self.relpos_k = relpos_k + self.no_bins = 2 * relpos_k + 1 + self.linear_relpos = Linear(self.no_bins, c_z) + + def relpos(self, ri: torch.Tensor): + """ + Computes relative positional encodings + + Implements Algorithm 4. + + Args: + ri: + "residue_index" features of shape [*, N] + """ + d = ri[..., None] - ri[..., None, :] + boundaries = torch.arange( + start=-self.relpos_k, end=self.relpos_k + 1, device=d.device + ) + oh = one_hot(d, boundaries).type(ri.dtype) + return self.linear_relpos(oh) + + def forward( + self, + tf: torch.Tensor, + ri: torch.Tensor, + msa: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + tf: + "target_feat" features of shape [*, N_res, tf_dim] + ri: + "residue_index" features of shape [*, N_res] + msa: + "msa_feat" features of shape [*, N_clust, N_res, msa_dim] + Returns: + msa_emb: + [*, N_clust, N_res, C_m] MSA embedding + pair_emb: + [*, N_res, N_res, C_z] pair embedding + + """ + # [*, N_res, c_z] + tf_emb_i = self.linear_tf_z_i(tf) + tf_emb_j = self.linear_tf_z_j(tf) + + # [*, N_res, N_res, c_z] + pair_emb = tf_emb_i[..., None, :] + tf_emb_j[..., None, :, :] + pair_emb = pair_emb + self.relpos(ri.type(pair_emb.dtype)) + + # [*, N_clust, N_res, c_m] + n_clust = msa.shape[-3] + tf_m = ( + self.linear_tf_m(tf) + .unsqueeze(-3) + .expand(((-1,) * len(tf.shape[:-2]) + (n_clust, -1, -1))) + ) + msa_emb = self.linear_msa_m(msa) + tf_m + + return msa_emb, pair_emb + + +class RecyclingEmbedder(nn.Module): + """ + Embeds the output of an iteration of the model for recycling. + + Implements Algorithm 32. + """ + + def __init__( + self, + c_m: int, + c_z: int, + min_bin: float, + max_bin: float, + no_bins: int, + inf: float = 1e8, + **kwargs, + ): + """ + Args: + c_m: + MSA channel dimension + c_z: + Pair embedding channel dimension + min_bin: + Smallest distogram bin (Angstroms) + max_bin: + Largest distogram bin (Angstroms) + no_bins: + Number of distogram bins + """ + super(RecyclingEmbedder, self).__init__() + + self.c_m = c_m + self.c_z = c_z + self.min_bin = min_bin + self.max_bin = max_bin + self.no_bins = no_bins + self.inf = inf + + self.bins = None + + self.linear = Linear(self.no_bins, self.c_z) + self.layer_norm_m = LayerNorm(self.c_m) + self.layer_norm_z = LayerNorm(self.c_z) + + def forward( + self, + m: torch.Tensor, + z: torch.Tensor, + x: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + m: + First row of the MSA embedding. [*, N_res, C_m] + z: + [*, N_res, N_res, C_z] pair embedding + x: + [*, N_res, 3] predicted C_beta coordinates + Returns: + m: + [*, N_res, C_m] MSA embedding update + z: + [*, N_res, N_res, C_z] pair embedding update + """ + if self.bins is None: + self.bins = torch.linspace( + self.min_bin, + self.max_bin, + self.no_bins, + dtype=x.dtype, + device=x.device, + requires_grad=False, + ) + + # [*, N, C_m] + m_update = self.layer_norm_m(m) + + # This squared method might become problematic in FP16 mode. + # I'm using it because my homegrown method had a stubborn discrepancy I + # couldn't find in time. + squared_bins = self.bins ** 2 + upper = torch.cat( + [squared_bins[1:], squared_bins.new_tensor([self.inf])], dim=-1 + ) + d = torch.sum( + (x[..., None, :] - x[..., None, :, :]) ** 2, dim=-1, keepdims=True + ) + + # [*, N, N, no_bins] + d = ((d > squared_bins) * (d < upper)).type(x.dtype) + + # [*, N, N, C_z] + d = self.linear(d) + z_update = d + self.layer_norm_z(z) + + return m_update, z_update + + +class TemplateAngleEmbedder(nn.Module): + """ + Embeds the "template_angle_feat" feature. + + Implements Algorithm 2, line 7. + """ + + def __init__( + self, + c_in: int, + c_out: int, + **kwargs, + ): + """ + Args: + c_in: + Final dimension of "template_angle_feat" + c_out: + Output channel dimension + """ + super(TemplateAngleEmbedder, self).__init__() + + self.c_out = c_out + self.c_in = c_in + + self.linear_1 = Linear(self.c_in, self.c_out, init="relu") + self.relu = nn.ReLU() + self.linear_2 = Linear(self.c_out, self.c_out, init="relu") + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x: [*, N_templ, N_res, c_in] "template_angle_feat" features + Returns: + x: [*, N_templ, N_res, C_out] embedding + """ + x = self.linear_1(x) + x = self.relu(x) + x = self.linear_2(x) + + return x + + +class TemplatePairEmbedder(nn.Module): + """ + Embeds "template_pair_feat" features. + + Implements Algorithm 2, line 9. + """ + + def __init__( + self, + c_in: int, + c_out: int, + **kwargs, + ): + """ + Args: + c_in: + + c_out: + Output channel dimension + """ + super(TemplatePairEmbedder, self).__init__() + + self.c_in = c_in + self.c_out = c_out + + # Despite there being no relu nearby, the source uses that initializer + self.linear = Linear(self.c_in, self.c_out, init="relu") + + def forward( + self, + x: torch.Tensor, + ) -> torch.Tensor: + """ + Args: + x: + [*, C_in] input tensor + Returns: + [*, C_out] output tensor + """ + x = self.linear(x) + + return x + + +class ExtraMSAEmbedder(nn.Module): + """ + Embeds unclustered MSA sequences. + + Implements Algorithm 2, line 15 + """ + + def __init__( + self, + c_in: int, + c_out: int, + **kwargs, + ): + """ + Args: + c_in: + Input channel dimension + c_out: + Output channel dimension + """ + super(ExtraMSAEmbedder, self).__init__() + + self.c_in = c_in + self.c_out = c_out + + self.linear = Linear(self.c_in, self.c_out) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x: + [*, N_extra_seq, N_res, C_in] "extra_msa_feat" features + Returns: + [*, N_extra_seq, N_res, C_out] embedding + """ + x = self.linear(x) + + return x diff --git a/openfold/model/evoformer.py b/openfold/model/evoformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a862097a40dcb81514e3522494a66ea5078b01 --- /dev/null +++ b/openfold/model/evoformer.py @@ -0,0 +1,630 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import torch +import torch.nn as nn +from typing import Tuple, Optional +from functools import partial + +from openfold.model.primitives import Linear, LayerNorm +from openfold.model.dropout import DropoutRowwise, DropoutColumnwise +from openfold.model.msa import ( + MSARowAttentionWithPairBias, + MSAColumnAttention, + MSAColumnGlobalAttention, +) +from openfold.model.outer_product_mean import OuterProductMean +from openfold.model.pair_transition import PairTransition +from openfold.model.triangular_attention import ( + TriangleAttentionStartingNode, + TriangleAttentionEndingNode, +) +from openfold.model.triangular_multiplicative_update import ( + TriangleMultiplicationOutgoing, + TriangleMultiplicationIncoming, +) +from openfold.utils.checkpointing import checkpoint_blocks, get_checkpoint_fn +from openfold.utils.tensor_utils import chunk_layer + + +class MSATransition(nn.Module): + """ + Feed-forward network applied to MSA activations after attention. + + Implements Algorithm 9 + """ + def __init__(self, c_m, n): + """ + Args: + c_m: + MSA channel dimension + n: + Factor multiplied to c_m to obtain the hidden channel + dimension + """ + super(MSATransition, self).__init__() + + self.c_m = c_m + self.n = n + + self.layer_norm = LayerNorm(self.c_m) + self.linear_1 = Linear(self.c_m, self.n * self.c_m, init="relu") + self.relu = nn.ReLU() + self.linear_2 = Linear(self.n * self.c_m, self.c_m, init="final") + + def _transition(self, m, mask): + m = self.linear_1(m) + m = self.relu(m) + m = self.linear_2(m) * mask + return m + + @torch.jit.ignore + def _chunk(self, + m: torch.Tensor, + mask: torch.Tensor, + chunk_size: int, + ) -> torch.Tensor: + return chunk_layer( + self._transition, + {"m": m, "mask": mask}, + chunk_size=chunk_size, + no_batch_dims=len(m.shape[:-2]), + ) + + + def forward( + self, + m: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None, + ) -> torch.Tensor: + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA activation + mask: + [*, N_seq, N_res, C_m] MSA mask + Returns: + m: + [*, N_seq, N_res, C_m] MSA activation update + """ + # DISCREPANCY: DeepMind forgets to apply the MSA mask here. + if mask is None: + mask = m.new_ones(m.shape[:-1]) + + mask = mask.unsqueeze(-1) + + m = self.layer_norm(m) + + if chunk_size is not None: + m = self._chunk(m, mask, chunk_size) + else: + m = self._transition(m, mask) + + return m + + +class EvoformerBlockCore(nn.Module): + def __init__( + self, + c_m: int, + c_z: int, + c_hidden_opm: int, + c_hidden_mul: int, + c_hidden_pair_att: int, + no_heads_msa: int, + no_heads_pair: int, + transition_n: int, + pair_dropout: float, + inf: float, + eps: float, + _is_extra_msa_stack: bool = False, + ): + super(EvoformerBlockCore, self).__init__() + + self.msa_transition = MSATransition( + c_m=c_m, + n=transition_n, + ) + + self.outer_product_mean = OuterProductMean( + c_m, + c_z, + c_hidden_opm, + ) + + self.tri_mul_out = TriangleMultiplicationOutgoing( + c_z, + c_hidden_mul, + ) + self.tri_mul_in = TriangleMultiplicationIncoming( + c_z, + c_hidden_mul, + ) + + self.tri_att_start = TriangleAttentionStartingNode( + c_z, + c_hidden_pair_att, + no_heads_pair, + inf=inf, + ) + self.tri_att_end = TriangleAttentionEndingNode( + c_z, + c_hidden_pair_att, + no_heads_pair, + inf=inf, + ) + + self.pair_transition = PairTransition( + c_z, + transition_n, + ) + + self.ps_dropout_row_layer = DropoutRowwise(pair_dropout) + self.ps_dropout_col_layer = DropoutColumnwise(pair_dropout) + + def forward( + self, + m: torch.Tensor, + z: torch.Tensor, + msa_mask: torch.Tensor, + pair_mask: torch.Tensor, + chunk_size: Optional[int] = None, + _mask_trans: bool = True, + ) -> Tuple[torch.Tensor, torch.Tensor]: + # DeepMind doesn't mask these transitions in the source, so _mask_trans + # should be disabled to better approximate the exact activations of + # the original. + msa_trans_mask = msa_mask if _mask_trans else None + pair_trans_mask = pair_mask if _mask_trans else None + + m = m + self.msa_transition( + m, mask=msa_trans_mask, chunk_size=chunk_size + ) + z = z + self.outer_product_mean( + m, mask=msa_mask, chunk_size=chunk_size + ) + z = z + self.ps_dropout_row_layer(self.tri_mul_out(z, mask=pair_mask)) + z = z + self.ps_dropout_row_layer(self.tri_mul_in(z, mask=pair_mask)) + z = z + self.ps_dropout_row_layer( + self.tri_att_start(z, mask=pair_mask, chunk_size=chunk_size) + ) + z = z + self.ps_dropout_col_layer( + self.tri_att_end(z, mask=pair_mask, chunk_size=chunk_size) + ) + z = z + self.pair_transition( + z, mask=pair_trans_mask, chunk_size=chunk_size + ) + + return m, z + + +class EvoformerBlock(nn.Module): + def __init__(self, + c_m: int, + c_z: int, + c_hidden_msa_att: int, + c_hidden_opm: int, + c_hidden_mul: int, + c_hidden_pair_att: int, + no_heads_msa: int, + no_heads_pair: int, + transition_n: int, + msa_dropout: float, + pair_dropout: float, + inf: float, + eps: float, + ): + super(EvoformerBlock, self).__init__() + + self.msa_att_row = MSARowAttentionWithPairBias( + c_m=c_m, + c_z=c_z, + c_hidden=c_hidden_msa_att, + no_heads=no_heads_msa, + inf=inf, + ) + + self.msa_att_col = MSAColumnAttention( + c_m, + c_hidden_msa_att, + no_heads_msa, + inf=inf, + ) + + self.msa_dropout_layer = DropoutRowwise(msa_dropout) + + self.core = EvoformerBlockCore( + c_m=c_m, + c_z=c_z, + c_hidden_opm=c_hidden_opm, + c_hidden_mul=c_hidden_mul, + c_hidden_pair_att=c_hidden_pair_att, + no_heads_msa=no_heads_msa, + no_heads_pair=no_heads_pair, + transition_n=transition_n, + pair_dropout=pair_dropout, + inf=inf, + eps=eps, + ) + + def forward(self, + m: torch.Tensor, + z: torch.Tensor, + msa_mask: torch.Tensor, + pair_mask: torch.Tensor, + chunk_size: Optional[int] = None, + _mask_trans: bool = True, + ) -> Tuple[torch.Tensor, torch.Tensor]: + m = m + self.msa_dropout_layer( + self.msa_att_row(m, z=z, mask=msa_mask, chunk_size=chunk_size) + ) + m = m + self.msa_att_col(m, mask=msa_mask, chunk_size=chunk_size) + m, z = self.core( + m, + z, + msa_mask=msa_mask, + pair_mask=pair_mask, + chunk_size=chunk_size, + _mask_trans=_mask_trans, + ) + + return m, z + + +class ExtraMSABlock(nn.Module): + """ + Almost identical to the standard EvoformerBlock, except in that the + ExtraMSABlock uses GlobalAttention for MSA column attention and + requires more fine-grained control over checkpointing. Separated from + its twin to preserve the TorchScript-ability of the latter. + """ + def __init__(self, + c_m: int, + c_z: int, + c_hidden_msa_att: int, + c_hidden_opm: int, + c_hidden_mul: int, + c_hidden_pair_att: int, + no_heads_msa: int, + no_heads_pair: int, + transition_n: int, + msa_dropout: float, + pair_dropout: float, + inf: float, + eps: float, + ckpt: bool, + ): + super(ExtraMSABlock, self).__init__() + + self.ckpt = ckpt + + self.msa_att_row = MSARowAttentionWithPairBias( + c_m=c_m, + c_z=c_z, + c_hidden=c_hidden_msa_att, + no_heads=no_heads_msa, + inf=inf, + ) + + self.msa_att_col = MSAColumnGlobalAttention( + c_in=c_m, + c_hidden=c_hidden_msa_att, + no_heads=no_heads_msa, + inf=inf, + eps=eps, + ) + + self.msa_dropout_layer = DropoutRowwise(msa_dropout) + + self.core = EvoformerBlockCore( + c_m=c_m, + c_z=c_z, + c_hidden_opm=c_hidden_opm, + c_hidden_mul=c_hidden_mul, + c_hidden_pair_att=c_hidden_pair_att, + no_heads_msa=no_heads_msa, + no_heads_pair=no_heads_pair, + transition_n=transition_n, + pair_dropout=pair_dropout, + inf=inf, + eps=eps, + ) + + def forward(self, + m: torch.Tensor, + z: torch.Tensor, + msa_mask: torch.Tensor, + pair_mask: torch.Tensor, + chunk_size: Optional[int] = None, + _chunk_logits: Optional[int] = 1024, + ) -> Tuple[torch.Tensor, torch.Tensor]: + def add(m1, m2): + # The first operation in a checkpoint can't be in-place, but it's + # nice to have in-place addition during inference. Thus... + if(torch.is_grad_enabled()): + m1 = m1 + m2 + else: + m1 += m2 + + return m1 + + m = add(m, self.msa_dropout_layer( + self.msa_att_row( + m.clone() if torch.is_grad_enabled() else m, + z=z.clone() if torch.is_grad_enabled() else z, + mask=msa_mask, + chunk_size=chunk_size, + _chunk_logits=_chunk_logits if torch.is_grad_enabled() else None, + _checkpoint_chunks= + self.ckpt if torch.is_grad_enabled() else False, + ) + )) + + def fn(m, z): + m = add(m, self.msa_att_col(m, mask=msa_mask, chunk_size=chunk_size)) + m, z = self.core( + m, z, msa_mask=msa_mask, pair_mask=pair_mask, chunk_size=chunk_size + ) + + return m, z + + if(torch.is_grad_enabled() and self.ckpt): + checkpoint_fn = get_checkpoint_fn() + m, z = checkpoint_fn(fn, m, z) + else: + m, z = fn(m, z) + + return m, z + + +class EvoformerStack(nn.Module): + """ + Main Evoformer trunk. + + Implements Algorithm 6. + """ + + def __init__( + self, + c_m: int, + c_z: int, + c_hidden_msa_att: int, + c_hidden_opm: int, + c_hidden_mul: int, + c_hidden_pair_att: int, + c_s: int, + no_heads_msa: int, + no_heads_pair: int, + no_blocks: int, + transition_n: int, + msa_dropout: float, + pair_dropout: float, + blocks_per_ckpt: int, + inf: float, + eps: float, + clear_cache_between_blocks: bool = False, + **kwargs, + ): + """ + Args: + c_m: + MSA channel dimension + c_z: + Pair channel dimension + c_hidden_msa_att: + Hidden dimension in MSA attention + c_hidden_opm: + Hidden dimension in outer product mean module + c_hidden_mul: + Hidden dimension in multiplicative updates + c_hidden_pair_att: + Hidden dimension in triangular attention + c_s: + Channel dimension of the output "single" embedding + no_heads_msa: + Number of heads used for MSA attention + no_heads_pair: + Number of heads used for pair attention + no_blocks: + Number of Evoformer blocks in the stack + transition_n: + Factor by which to multiply c_m to obtain the MSATransition + hidden dimension + msa_dropout: + Dropout rate for MSA activations + pair_dropout: + Dropout used for pair activations + blocks_per_ckpt: + Number of Evoformer blocks in each activation checkpoint + clear_cache_between_blocks: + Whether to clear CUDA's GPU memory cache between blocks of the + stack. Slows down each block but can reduce fragmentation + """ + super(EvoformerStack, self).__init__() + + self.blocks_per_ckpt = blocks_per_ckpt + self.clear_cache_between_blocks = clear_cache_between_blocks + + self.blocks = nn.ModuleList() + + for _ in range(no_blocks): + block = EvoformerBlock( + c_m=c_m, + c_z=c_z, + c_hidden_msa_att=c_hidden_msa_att, + c_hidden_opm=c_hidden_opm, + c_hidden_mul=c_hidden_mul, + c_hidden_pair_att=c_hidden_pair_att, + no_heads_msa=no_heads_msa, + no_heads_pair=no_heads_pair, + transition_n=transition_n, + msa_dropout=msa_dropout, + pair_dropout=pair_dropout, + inf=inf, + eps=eps, + ) + self.blocks.append(block) + + self.linear = Linear(c_m, c_s) + + def forward(self, + m: torch.Tensor, + z: torch.Tensor, + msa_mask: torch.Tensor, + pair_mask: torch.Tensor, + chunk_size: int, + _mask_trans: bool = True, + ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA embedding + z: + [*, N_res, N_res, C_z] pair embedding + msa_mask: + [*, N_seq, N_res] MSA mask + pair_mask: + [*, N_res, N_res] pair mask + Returns: + m: + [*, N_seq, N_res, C_m] MSA embedding + z: + [*, N_res, N_res, C_z] pair embedding + s: + [*, N_res, C_s] single embedding (or None if extra MSA stack) + """ + blocks = [ + partial( + b, + msa_mask=msa_mask, + pair_mask=pair_mask, + chunk_size=chunk_size, + _mask_trans=_mask_trans, + ) + for b in self.blocks + ] + + if(self.clear_cache_between_blocks): + def block_with_cache_clear(block, *args): + torch.cuda.empty_cache() + return block(*args) + + blocks = [partial(block_with_cache_clear, b) for b in blocks] + + m, z = checkpoint_blocks( + blocks, + args=(m, z), + blocks_per_ckpt=self.blocks_per_ckpt if self.training else None, + ) + + s = self.linear(m[..., 0, :, :]) + + return m, z, s + + +class ExtraMSAStack(nn.Module): + """ + Implements Algorithm 18. + """ + + def __init__(self, + c_m: int, + c_z: int, + c_hidden_msa_att: int, + c_hidden_opm: int, + c_hidden_mul: int, + c_hidden_pair_att: int, + no_heads_msa: int, + no_heads_pair: int, + no_blocks: int, + transition_n: int, + msa_dropout: float, + pair_dropout: float, + inf: float, + eps: float, + ckpt: bool, + clear_cache_between_blocks: bool = False, + **kwargs, + ): + super(ExtraMSAStack, self).__init__() + + self.clear_cache_between_blocks = clear_cache_between_blocks + self.blocks = nn.ModuleList() + for _ in range(no_blocks): + block = ExtraMSABlock( + c_m=c_m, + c_z=c_z, + c_hidden_msa_att=c_hidden_msa_att, + c_hidden_opm=c_hidden_opm, + c_hidden_mul=c_hidden_mul, + c_hidden_pair_att=c_hidden_pair_att, + no_heads_msa=no_heads_msa, + no_heads_pair=no_heads_pair, + transition_n=transition_n, + msa_dropout=msa_dropout, + pair_dropout=pair_dropout, + inf=inf, + eps=eps, + ckpt=ckpt, + ) + self.blocks.append(block) + + def forward(self, + m: torch.Tensor, + z: torch.Tensor, + chunk_size: int, + msa_mask: Optional[torch.Tensor] = None, + pair_mask: Optional[torch.Tensor] = None, + _mask_trans: bool = True, + ) -> torch.Tensor: + """ + Args: + m: + [*, N_extra, N_res, C_m] extra MSA embedding + z: + [*, N_res, N_res, C_z] pair embedding + msa_mask: + Optional [*, N_extra, N_res] MSA mask + pair_mask: + Optional [*, N_res, N_res] pair mask + Returns: + [*, N_res, N_res, C_z] pair update + """ + #checkpoint_fn = get_checkpoint_fn() + #blocks = [ + # partial(b, msa_mask=msa_mask, pair_mask=pair_mask, chunk_size=chunk_size, _chunk_logits=None) for b in self.blocks + #] + + #def dodo(b, *args): + # torch.cuda.empty_cache() + # return b(*args) + + #blocks = [partial(dodo, b) for b in blocks] + + #for b in blocks: + # if(torch.is_grad_enabled()): + # m, z = checkpoint_fn(b, *(m, z)) + # else: + # m, z = b(m, z) + + for b in self.blocks: + m, z = b(m, z, msa_mask, pair_mask, chunk_size=chunk_size) + + if(self.clear_cache_between_blocks): + torch.cuda.empty_cache() + + return z diff --git a/openfold/model/heads.py b/openfold/model/heads.py new file mode 100644 index 0000000000000000000000000000000000000000..f46126d741e79d177a80220f89e1523a408e569b --- /dev/null +++ b/openfold/model/heads.py @@ -0,0 +1,251 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear, LayerNorm +from openfold.utils.loss import ( + compute_plddt, + compute_tm, + compute_predicted_aligned_error, +) + + +class AuxiliaryHeads(nn.Module): + def __init__(self, config): + super(AuxiliaryHeads, self).__init__() + + self.plddt = PerResidueLDDTCaPredictor( + **config["lddt"], + ) + + self.distogram = DistogramHead( + **config["distogram"], + ) + + self.masked_msa = MaskedMSAHead( + **config["masked_msa"], + ) + + self.experimentally_resolved = ExperimentallyResolvedHead( + **config["experimentally_resolved"], + ) + + if config.tm.enabled: + self.tm = TMScoreHead( + **config.tm, + ) + + self.config = config + + def forward(self, outputs): + aux_out = {} + lddt_logits = self.plddt(outputs["sm"]["single"]) + aux_out["lddt_logits"] = lddt_logits + + # Required for relaxation later on + aux_out["plddt"] = compute_plddt(lddt_logits) + + distogram_logits = self.distogram(outputs["pair"]) + aux_out["distogram_logits"] = distogram_logits + + masked_msa_logits = self.masked_msa(outputs["msa"]) + aux_out["masked_msa_logits"] = masked_msa_logits + + experimentally_resolved_logits = self.experimentally_resolved( + outputs["single"] + ) + aux_out[ + "experimentally_resolved_logits" + ] = experimentally_resolved_logits + + if self.config.tm.enabled: + tm_logits = self.tm(outputs["pair"]) + aux_out["tm_logits"] = tm_logits + aux_out["predicted_tm_score"] = compute_tm( + tm_logits, **self.config.tm + ) + aux_out.update( + compute_predicted_aligned_error( + tm_logits, + **self.config.tm, + ) + ) + + return aux_out + + +class PerResidueLDDTCaPredictor(nn.Module): + def __init__(self, no_bins, c_in, c_hidden): + super(PerResidueLDDTCaPredictor, self).__init__() + + self.no_bins = no_bins + self.c_in = c_in + self.c_hidden = c_hidden + + self.layer_norm = LayerNorm(self.c_in) + + self.linear_1 = Linear(self.c_in, self.c_hidden, init="relu") + self.linear_2 = Linear(self.c_hidden, self.c_hidden, init="relu") + self.linear_3 = Linear(self.c_hidden, self.no_bins, init="final") + + self.relu = nn.ReLU() + + def forward(self, s): + s = self.layer_norm(s) + s = self.linear_1(s) + s = self.relu(s) + s = self.linear_2(s) + s = self.relu(s) + s = self.linear_3(s) + + return s + + +class DistogramHead(nn.Module): + """ + Computes a distogram probability distribution. + + For use in computation of distogram loss, subsection 1.9.8 + """ + + def __init__(self, c_z, no_bins, **kwargs): + """ + Args: + c_z: + Input channel dimension + no_bins: + Number of distogram bins + """ + super(DistogramHead, self).__init__() + + self.c_z = c_z + self.no_bins = no_bins + + self.linear = Linear(self.c_z, self.no_bins, init="final") + + def forward(self, z): # [*, N, N, C_z] + """ + Args: + z: + [*, N_res, N_res, C_z] pair embedding + Returns: + [*, N, N, no_bins] distogram probability distribution + """ + # [*, N, N, no_bins] + logits = self.linear(z) + logits = logits + logits.transpose(-2, -3) + return logits + + +class TMScoreHead(nn.Module): + """ + For use in computation of TM-score, subsection 1.9.7 + """ + + def __init__(self, c_z, no_bins, **kwargs): + """ + Args: + c_z: + Input channel dimension + no_bins: + Number of bins + """ + super(TMScoreHead, self).__init__() + + self.c_z = c_z + self.no_bins = no_bins + + self.linear = Linear(self.c_z, self.no_bins, init="final") + + def forward(self, z): + """ + Args: + z: + [*, N_res, N_res, C_z] pairwise embedding + Returns: + [*, N_res, N_res, no_bins] prediction + """ + # [*, N, N, no_bins] + logits = self.linear(z) + return logits + + +class MaskedMSAHead(nn.Module): + """ + For use in computation of masked MSA loss, subsection 1.9.9 + """ + + def __init__(self, c_m, c_out, **kwargs): + """ + Args: + c_m: + MSA channel dimension + c_out: + Output channel dimension + """ + super(MaskedMSAHead, self).__init__() + + self.c_m = c_m + self.c_out = c_out + + self.linear = Linear(self.c_m, self.c_out, init="final") + + def forward(self, m): + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA embedding + Returns: + [*, N_seq, N_res, C_out] reconstruction + """ + # [*, N_seq, N_res, C_out] + logits = self.linear(m) + return logits + + +class ExperimentallyResolvedHead(nn.Module): + """ + For use in computation of "experimentally resolved" loss, subsection + 1.9.10 + """ + + def __init__(self, c_s, c_out, **kwargs): + """ + Args: + c_s: + Input channel dimension + c_out: + Number of distogram bins + """ + super(ExperimentallyResolvedHead, self).__init__() + + self.c_s = c_s + self.c_out = c_out + + self.linear = Linear(self.c_s, self.c_out, init="final") + + def forward(self, s): + """ + Args: + s: + [*, N_res, C_s] single embedding + Returns: + [*, N, C_out] logits + """ + # [*, N, C_out] + logits = self.linear(s) + return logits diff --git a/openfold/model/model.py b/openfold/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..007b90f7af7b7f5b379f5c18d89f0f69f398c494 --- /dev/null +++ b/openfold/model/model.py @@ -0,0 +1,446 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +import torch +import torch.nn as nn + +from openfold.utils.feats import ( + pseudo_beta_fn, + build_extra_msa_feat, + build_template_angle_feat, + build_template_pair_feat, + atom14_to_atom37, +) +from openfold.model.embedders import ( + InputEmbedder, + RecyclingEmbedder, + TemplateAngleEmbedder, + TemplatePairEmbedder, + ExtraMSAEmbedder, +) +from openfold.model.evoformer import EvoformerStack, ExtraMSAStack +from openfold.model.heads import AuxiliaryHeads +import openfold.np.residue_constants as residue_constants +from openfold.model.structure_module import StructureModule +from openfold.model.template import ( + TemplatePairStack, + TemplatePointwiseAttention, +) +from openfold.utils.loss import ( + compute_plddt, +) +from openfold.utils.tensor_utils import ( + dict_multimap, + tensor_tree_map, +) + + +class AlphaFold(nn.Module): + """ + Alphafold 2. + + Implements Algorithm 2 (but with training). + """ + + def __init__(self, config): + """ + Args: + config: + A dict-like config object (like the one in config.py) + """ + super(AlphaFold, self).__init__() + + self.globals = config.globals + config = config.model + template_config = config.template + extra_msa_config = config.extra_msa + + # Main trunk + structure module + self.input_embedder = InputEmbedder( + **config["input_embedder"], + ) + self.recycling_embedder = RecyclingEmbedder( + **config["recycling_embedder"], + ) + self.template_angle_embedder = TemplateAngleEmbedder( + **template_config["template_angle_embedder"], + ) + self.template_pair_embedder = TemplatePairEmbedder( + **template_config["template_pair_embedder"], + ) + self.template_pair_stack = TemplatePairStack( + **template_config["template_pair_stack"], + ) + self.template_pointwise_att = TemplatePointwiseAttention( + **template_config["template_pointwise_attention"], + ) + self.extra_msa_embedder = ExtraMSAEmbedder( + **extra_msa_config["extra_msa_embedder"], + ) + self.extra_msa_stack = ExtraMSAStack( + **extra_msa_config["extra_msa_stack"], + ) + self.evoformer = EvoformerStack( + **config["evoformer_stack"], + ) + self.structure_module = StructureModule( + **config["structure_module"], + ) + + self.aux_heads = AuxiliaryHeads( + config["heads"], + ) + + self.config = config + + def embed_templates(self, batch, z, pair_mask, templ_dim): + # Embed the templates one at a time (with a poor man's vmap) + template_embeds = [] + n_templ = batch["template_aatype"].shape[templ_dim] + for i in range(n_templ): + idx = batch["template_aatype"].new_tensor(i) + single_template_feats = tensor_tree_map( + lambda t: torch.index_select(t, templ_dim, idx), + batch, + ) + + single_template_embeds = {} + if self.config.template.embed_angles: + template_angle_feat = build_template_angle_feat( + single_template_feats, + ) + + # [*, S_t, N, C_m] + a = self.template_angle_embedder(template_angle_feat) + + single_template_embeds["angle"] = a + + # [*, S_t, N, N, C_t] + t = build_template_pair_feat( + single_template_feats, + inf=self.config.template.inf, + eps=self.config.template.eps, + **self.config.template.distogram, + ).to(z.dtype) + t = self.template_pair_embedder(t) + + single_template_embeds.update({"pair": t}) + + template_embeds.append(single_template_embeds) + + template_embeds = dict_multimap( + partial(torch.cat, dim=templ_dim), + template_embeds, + ) + + # [*, S_t, N, N, C_z] + t = self.template_pair_stack( + template_embeds["pair"], + pair_mask.unsqueeze(-3).to(dtype=z.dtype), + chunk_size=self.globals.chunk_size, + _mask_trans=self.config._mask_trans, + ) + + # [*, N, N, C_z] + t = self.template_pointwise_att( + t, + z, + template_mask=batch["template_mask"].to(dtype=z.dtype), + chunk_size=self.globals.chunk_size, + ) + t = t * (torch.sum(batch["template_mask"]) > 0) + + ret = {} + if self.config.template.embed_angles: + ret["template_angle_embedding"] = template_embeds["angle"] + + ret.update({"template_pair_embedding": t}) + + return ret + + def iteration(self, feats, m_1_prev, z_prev, x_prev, _recycle=True): + # Primary output dictionary + outputs = {} + + # This needs to be done manually for DeepSpeed's sake + dtype = next(self.parameters()).dtype + for k in feats: + if(feats[k].dtype == torch.float32): + feats[k] = feats[k].to(dtype=dtype) + + # Grab some data about the input + batch_dims = feats["target_feat"].shape[:-2] + no_batch_dims = len(batch_dims) + n = feats["target_feat"].shape[-2] + n_seq = feats["msa_feat"].shape[-3] + device = feats["target_feat"].device + + # Prep some features + seq_mask = feats["seq_mask"] + pair_mask = seq_mask[..., None] * seq_mask[..., None, :] + msa_mask = feats["msa_mask"] + + # Initialize the MSA and pair representations + + # m: [*, S_c, N, C_m] + # z: [*, N, N, C_z] + m, z = self.input_embedder( + feats["target_feat"], + feats["residue_index"], + feats["msa_feat"], + ) + + # Initialize the recycling embeddings, if needs be + if None in [m_1_prev, z_prev, x_prev]: + # [*, N, C_m] + m_1_prev = m.new_zeros( + (*batch_dims, n, self.config.input_embedder.c_m), + requires_grad=False, + ) + + # [*, N, N, C_z] + z_prev = z.new_zeros( + (*batch_dims, n, n, self.config.input_embedder.c_z), + requires_grad=False, + ) + + # [*, N, 3] + x_prev = z.new_zeros( + (*batch_dims, n, residue_constants.atom_type_num, 3), + requires_grad=False, + ) + + x_prev = pseudo_beta_fn( + feats["aatype"], x_prev, None + ).to(dtype=z.dtype) + + # m_1_prev_emb: [*, N, C_m] + # z_prev_emb: [*, N, N, C_z] + m_1_prev_emb, z_prev_emb = self.recycling_embedder( + m_1_prev, + z_prev, + x_prev, + ) + + # If the number of recycling iterations is 0, skip recycling + # altogether. We zero them this way instead of computing them + # conditionally to avoid leaving parameters unused, which has annoying + # implications for DDP training. + if(not _recycle): + m_1_prev_emb *= 0 + z_prev_emb *= 0 + + # [*, S_c, N, C_m] + m[..., 0, :, :] += m_1_prev_emb + + # [*, N, N, C_z] + z += z_prev_emb + + # Possibly prevents memory fragmentation + del m_1_prev, z_prev, x_prev, m_1_prev_emb, z_prev_emb + + # Embed the templates + merge with MSA/pair embeddings + if self.config.template.enabled: + template_feats = { + k: v for k, v in feats.items() if k.startswith("template_") + } + template_embeds = self.embed_templates( + template_feats, + z, + pair_mask.to(dtype=z.dtype), + no_batch_dims, + ) + + # [*, N, N, C_z] + z = z + template_embeds["template_pair_embedding"] + + if self.config.template.embed_angles: + # [*, S = S_c + S_t, N, C_m] + m = torch.cat( + [m, template_embeds["template_angle_embedding"]], + dim=-3 + ) + + # [*, S, N] + torsion_angles_mask = feats["template_torsion_angles_mask"] + msa_mask = torch.cat( + [feats["msa_mask"], torsion_angles_mask[..., 2]], + dim=-2 + ) + + # Embed extra MSA features + merge with pairwise embeddings + if self.config.extra_msa.enabled: + # [*, S_e, N, C_e] + a = self.extra_msa_embedder(build_extra_msa_feat(feats)) + + # [*, N, N, C_z] + z = self.extra_msa_stack( + a, + z, + msa_mask=feats["extra_msa_mask"].to(dtype=a.dtype), + chunk_size=self.globals.chunk_size, + pair_mask=pair_mask.to(dtype=z.dtype), + _mask_trans=self.config._mask_trans, + ) + + # Run MSA + pair embeddings through the trunk of the network + # m: [*, S, N, C_m] + # z: [*, N, N, C_z] + # s: [*, N, C_s] + m, z, s = self.evoformer( + m, + z, + msa_mask=msa_mask.to(dtype=m.dtype), + pair_mask=pair_mask.to(dtype=z.dtype), + chunk_size=self.globals.chunk_size, + _mask_trans=self.config._mask_trans, + ) + + outputs["msa"] = m[..., :n_seq, :, :] + outputs["pair"] = z + outputs["single"] = s + + # Predict 3D structure + outputs["sm"] = self.structure_module( + s, + z, + feats["aatype"], + mask=feats["seq_mask"].to(dtype=s.dtype), + ) + outputs["final_atom_positions"] = atom14_to_atom37( + outputs["sm"]["positions"][-1], feats + ) + outputs["final_atom_mask"] = feats["atom37_atom_exists"] + outputs["final_affine_tensor"] = outputs["sm"]["frames"][-1] + + # Save embeddings for use during the next recycling iteration + + # [*, N, C_m] + m_1_prev = m[..., 0, :, :] + + # [*, N, N, C_z] + z_prev = z + + # [*, N, 3] + x_prev = outputs["final_atom_positions"] + + return outputs, m_1_prev, z_prev, x_prev + + def _disable_activation_checkpointing(self): + self.template_pair_stack.blocks_per_ckpt = None + self.evoformer.blocks_per_ckpt = None + + for b in self.extra_msa_stack.blocks: + b.ckpt = False + + def _enable_activation_checkpointing(self): + self.template_pair_stack.blocks_per_ckpt = ( + self.config.template.template_pair_stack.blocks_per_ckpt + ) + self.evoformer.blocks_per_ckpt = ( + self.config.evoformer_stack.blocks_per_ckpt + ) + + for b in self.extra_msa_stack.blocks: + b.ckpt = self.config.extra_msa.extra_msa_stack.ckpt + + def forward(self, batch): + """ + Args: + batch: + Dictionary of arguments outlined in Algorithm 2. Keys must + include the official names of the features in the + supplement subsection 1.2.9. + + The final dimension of each input must have length equal to + the number of recycling iterations. + + Features (without the recycling dimension): + + "aatype" ([*, N_res]): + Contrary to the supplement, this tensor of residue + indices is not one-hot. + "target_feat" ([*, N_res, C_tf]) + One-hot encoding of the target sequence. C_tf is + config.model.input_embedder.tf_dim. + "residue_index" ([*, N_res]) + Tensor whose final dimension consists of + consecutive indices from 0 to N_res. + "msa_feat" ([*, N_seq, N_res, C_msa]) + MSA features, constructed as in the supplement. + C_msa is config.model.input_embedder.msa_dim. + "seq_mask" ([*, N_res]) + 1-D sequence mask + "msa_mask" ([*, N_seq, N_res]) + MSA mask + "pair_mask" ([*, N_res, N_res]) + 2-D pair mask + "extra_msa_mask" ([*, N_extra, N_res]) + Extra MSA mask + "template_mask" ([*, N_templ]) + Template mask (on the level of templates, not + residues) + "template_aatype" ([*, N_templ, N_res]) + Tensor of template residue indices (indices greater + than 19 are clamped to 20 (Unknown)) + "template_all_atom_positions" + ([*, N_templ, N_res, 37, 3]) + Template atom coordinates in atom37 format + "template_all_atom_mask" ([*, N_templ, N_res, 37]) + Template atom coordinate mask + "template_pseudo_beta" ([*, N_templ, N_res, 3]) + Positions of template carbon "pseudo-beta" atoms + (i.e. C_beta for all residues but glycine, for + for which C_alpha is used instead) + "template_pseudo_beta_mask" ([*, N_templ, N_res]) + Pseudo-beta mask + """ + # Initialize recycling embeddings + m_1_prev, z_prev, x_prev = None, None, None + + # Disable activation checkpointing for the first few recycling iters + is_grad_enabled = torch.is_grad_enabled() + self._disable_activation_checkpointing() + + # Main recycling loop + num_iters = batch["aatype"].shape[-1] + for cycle_no in range(num_iters): + # Select the features for the current recycling cycle + fetch_cur_batch = lambda t: t[..., cycle_no] + feats = tensor_tree_map(fetch_cur_batch, batch) + + # Enable grad iff we're training and it's the final recycling layer + is_final_iter = cycle_no == (num_iters - 1) + with torch.set_grad_enabled(is_grad_enabled and is_final_iter): + if is_final_iter: + self._enable_activation_checkpointing() + # Sidestep AMP bug (PyTorch issue #65766) + if torch.is_autocast_enabled(): + torch.clear_autocast_cache() + + # Run the next iteration of the model + outputs, m_1_prev, z_prev, x_prev = self.iteration( + feats, + m_1_prev, + z_prev, + x_prev, + _recycle=(num_iters > 1) + ) + + # Run auxiliary heads + outputs.update(self.aux_heads(outputs)) + + return outputs diff --git a/openfold/model/msa.py b/openfold/model/msa.py new file mode 100644 index 0000000000000000000000000000000000000000..8d3936f296a8cd5b7fa7f28a9d941668b2bd6c66 --- /dev/null +++ b/openfold/model/msa.py @@ -0,0 +1,392 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import torch +import torch.nn as nn +from typing import Optional, List, Tuple + +from openfold.model.primitives import ( + Linear, + LayerNorm, + Attention, + GlobalAttention, + _attention_chunked_trainable, +) +from openfold.utils.checkpointing import get_checkpoint_fn +from openfold.utils.tensor_utils import ( + chunk_layer, + permute_final_dims, + flatten_final_dims, +) + + +class MSAAttention(nn.Module): + def __init__( + self, + c_in, + c_hidden, + no_heads, + pair_bias=False, + c_z=None, + inf=1e9, + ): + """ + Args: + c_in: + Input channel dimension + c_hidden: + Per-head hidden channel dimension + no_heads: + Number of attention heads + pair_bias: + Whether to use pair embedding bias + c_z: + Pair embedding channel dimension. Ignored unless pair_bias + is true + inf: + A large number to be used in computing the attention mask + """ + super(MSAAttention, self).__init__() + + self.c_in = c_in + self.c_hidden = c_hidden + self.no_heads = no_heads + self.pair_bias = pair_bias + self.c_z = c_z + self.inf = inf + + self.layer_norm_m = LayerNorm(self.c_in) + + self.layer_norm_z = None + self.linear_z = None + if self.pair_bias: + self.layer_norm_z = LayerNorm(self.c_z) + self.linear_z = Linear( + self.c_z, self.no_heads, bias=False, init="normal" + ) + + self.mha = Attention( + self.c_in, self.c_in, self.c_in, self.c_hidden, self.no_heads + ) + + @torch.jit.ignore + def _chunk(self, + m: torch.Tensor, + biases: List[torch.Tensor], + chunk_size: int, + ) -> torch.Tensor: + return chunk_layer( + self.mha, + {"q_x": m, "kv_x": m, "biases": biases}, + chunk_size=chunk_size, + no_batch_dims=len(m.shape[:-2]), + ) + + def _prep_inputs(self, + m: torch.Tensor, + z: Optional[torch.Tensor], + mask: Optional[torch.Tensor] + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + # [*, N_seq, N_res, C_m] + m = self.layer_norm_m(m) + + n_seq, n_res = m.shape[-3:-1] + if mask is None: + # [*, N_seq, N_res] + mask = m.new_ones( + m.shape[:-3] + (n_seq, n_res), + ) + + # [*, N_seq, 1, 1, N_res] + mask_bias = (self.inf * (mask - 1))[..., :, None, None, :] + + # This step simply returns a larger view of the bias, and does not + # consume additional memory. + # [*, N_seq, no_heads, N_res, N_res] + #bias = bias.expand( + # ((-1,) * len(bias.shape[:-4])) + (-1, self.no_heads, n_res, -1) + #) + + if (self.pair_bias and + z is not None and # For the + self.layer_norm_z is not None and # benefit of + self.linear_z is not None # TorchScript + ): + # [*, N_res, N_res, C_z] + z = self.layer_norm_z(z) + + # [*, N_res, N_res, no_heads] + z = self.linear_z(z) + + # [*, 1, no_heads, N_res, N_res] + z = permute_final_dims(z, (2, 0, 1)).unsqueeze(-4) + + return m, mask_bias, z + + @torch.jit.ignore + def _chunked_msa_attn(self, + m: torch.Tensor, + z: Optional[torch.Tensor], + mask: Optional[torch.Tensor], + chunk_logits: int, + checkpoint: bool, + ) -> torch.Tensor: + MSA_DIM = -4 + + def _get_qkv(m, z): + m, mask_bias, z = self._prep_inputs(m, z, mask) + q, k, v = self.mha._prep_qkv(m, m) + return m, q, k, v, mask_bias, z + + checkpoint_fn = get_checkpoint_fn() + + if(torch.is_grad_enabled() and checkpoint): + m, q, k, v, mask_bias, z = checkpoint_fn(_get_qkv, m, z) + else: + m, q, k, v, mask_bias, z = _get_qkv(m, z) + + o = _attention_chunked_trainable( + query=q, + key=k, + value=v, + biases=[mask_bias, z], + chunk_size=chunk_logits, + chunk_dim=MSA_DIM, + checkpoint=checkpoint, + ) + + if(torch.is_grad_enabled() and checkpoint): + # Storing an additional m here is far from ideal + m = checkpoint_fn(self.mha._wrap_up, o, m) + else: + m = self.mha._wrap_up(o, m) + + return m + + def forward(self, + m: torch.Tensor, + z: Optional[torch.Tensor] = None, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None, + _chunk_logits: Optional[int] = None, + _checkpoint_chunks: Optional[bool] = None, + ) -> torch.Tensor: + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA embedding + z: + [*, N_res, N_res, C_z] pair embedding. Required only if + pair_bias is True + mask: + [*, N_seq, N_res] MSA mask + chunk_size: + Size of chunks into which the inputs are split along their + batch dimensions. A low value decreases memory overhead at the + cost of slower execution. Chunking is not performed by default. + + """ + if(_chunk_logits is not None): + return self._chunked_msa_attn( + m=m, z=z, mask=mask, + chunk_logits=_chunk_logits, checkpoint=_checkpoint_chunks + ) + + m, mask_bias, z = self._prep_inputs(m, z, mask) + + biases = [mask_bias] + if(z is not None): + biases.append(z) + + if chunk_size is not None: + m = self._chunk(m, biases, chunk_size) + else: + m = self.mha( + q_x=m, + kv_x=m, + biases=biases + ) + + return m + + +class MSARowAttentionWithPairBias(MSAAttention): + """ + Implements Algorithm 7. + """ + + def __init__(self, c_m, c_z, c_hidden, no_heads, inf=1e9): + """ + Args: + c_m: + Input channel dimension + c_z: + Pair embedding channel dimension + c_hidden: + Per-head hidden channel dimension + no_heads: + Number of attention heads + inf: + Large number used to construct attention masks + """ + super(MSARowAttentionWithPairBias, self).__init__( + c_m, + c_hidden, + no_heads, + pair_bias=True, + c_z=c_z, + inf=inf, + ) + + +class MSAColumnAttention(nn.Module): + """ + Implements Algorithm 8. + + By rights, this should also be a subclass of MSAAttention. Alas, + most inheritance isn't supported by TorchScript. + """ + + def __init__(self, c_m, c_hidden, no_heads, inf=1e9): + """ + Args: + c_m: + MSA channel dimension + c_hidden: + Per-head hidden channel dimension + no_heads: + Number of attention heads + inf: + Large number used to construct attention masks + """ + super(MSAColumnAttention, self).__init__() + + self.c_m = c_m + self.c_hidden = c_hidden + self.no_heads = no_heads + self.inf = inf + + self._msa_att = MSAAttention( + c_in=c_m, + c_hidden=c_hidden, + no_heads=no_heads, + pair_bias=False, + c_z=None, + inf=inf, + ) + + def forward(self, + m: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None + ) -> torch.Tensor: + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA embedding + mask: + [*, N_seq, N_res] MSA mask + chunk_size: + Size of chunks into which the inputs are split along their + batch dimensions. A low value decreases memory overhead at the + cost of slower execution. Chunking is not performed by default. + """ + # [*, N_res, N_seq, C_in] + m = m.transpose(-2, -3) + if mask is not None: + mask = mask.transpose(-1, -2) + + m = self._msa_att(m, mask=mask, chunk_size=chunk_size) + + # [*, N_seq, N_res, C_in] + m = m.transpose(-2, -3) + if mask is not None: + mask = mask.transpose(-1, -2) + + return m + + +class MSAColumnGlobalAttention(nn.Module): + def __init__( + self, c_in, c_hidden, no_heads, inf=1e9, eps=1e-10, + ): + super(MSAColumnGlobalAttention, self).__init__() + + self.c_in = c_in + self.c_hidden = c_hidden + self.no_heads = no_heads + self.inf = inf + self.eps = eps + + self.layer_norm_m = nn.LayerNorm(c_in) + + self.global_attention = GlobalAttention( + c_in=c_in, + c_hidden=c_hidden, + no_heads=no_heads, + inf=inf, + eps=eps, + ) + + @torch.jit.ignore + def _chunk(self, + m: torch.Tensor, + mask: torch.Tensor, + chunk_size: int, + ) -> torch.Tensor: + mha_input = { + "m": m, + "mask": mask, + } + return chunk_layer( + self.global_attention, + mha_input, + chunk_size=chunk_size, + no_batch_dims=len(m.shape[:-2]), + ) + + def forward( + self, + m: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None, + ) -> torch.Tensor: + n_seq, n_res, c_in = m.shape[-3:] + + if mask is None: + # [*, N_seq, N_res] + mask = torch.ones( + m.shape[:-1], + dtype=m.dtype, + device=m.device, + ).detach() + + # [*, N_res, N_seq, C_in] + m = m.transpose(-2, -3) + mask = mask.transpose(-1, -2) + + # [*, N_res, N_seq, C_in] + m = self.layer_norm_m(m) + + if chunk_size is not None: + m = self._chunk(m, mask, chunk_size) + else: + m = self.global_attention(m=m, mask=mask) + + # [*, N_seq, N_res, C_in] + m = m.transpose(-2, -3) + + return m diff --git a/openfold/model/outer_product_mean.py b/openfold/model/outer_product_mean.py new file mode 100644 index 0000000000000000000000000000000000000000..2edef22e13235b1d40e403176b5b41828e10832f --- /dev/null +++ b/openfold/model/outer_product_mean.py @@ -0,0 +1,129 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +from typing import Optional + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear +from openfold.utils.tensor_utils import chunk_layer + + +class OuterProductMean(nn.Module): + """ + Implements Algorithm 10. + """ + + def __init__(self, c_m, c_z, c_hidden, eps=1e-3): + """ + Args: + c_m: + MSA embedding channel dimension + c_z: + Pair embedding channel dimension + c_hidden: + Hidden channel dimension + """ + super(OuterProductMean, self).__init__() + + self.c_m = c_m + self.c_z = c_z + self.c_hidden = c_hidden + self.eps = eps + + self.layer_norm = nn.LayerNorm(c_m) + self.linear_1 = Linear(c_m, c_hidden) + self.linear_2 = Linear(c_m, c_hidden) + self.linear_out = Linear(c_hidden ** 2, c_z, init="final") + + def _opm(self, a, b): + # [*, N_res, N_res, C, C] + outer = torch.einsum("...bac,...dae->...bdce", a, b) + + # [*, N_res, N_res, C * C] + outer = outer.reshape(outer.shape[:-2] + (-1,)) + + # [*, N_res, N_res, C_z] + outer = self.linear_out(outer) + + return outer + + @torch.jit.ignore + def _chunk(self, + a: torch.Tensor, + b: torch.Tensor, + chunk_size: int + ) -> torch.Tensor: + # Since the "batch dim" in this case is not a true batch dimension + # (in that the shape of the output depends on it), we need to + # iterate over it ourselves + a_reshape = a.reshape((-1,) + a.shape[-3:]) + b_reshape = b.reshape((-1,) + b.shape[-3:]) + out = [] + for a_prime, b_prime in zip(a_reshape, b_reshape): + outer = chunk_layer( + partial(self._opm, b=b_prime), + {"a": a_prime}, + chunk_size=chunk_size, + no_batch_dims=1, + ) + out.append(outer) + outer = torch.stack(out, dim=0) + outer = outer.reshape(a.shape[:-3] + outer.shape[1:]) + + return outer + + def forward(self, + m: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None + ) -> torch.Tensor: + """ + Args: + m: + [*, N_seq, N_res, C_m] MSA embedding + mask: + [*, N_seq, N_res] MSA mask + Returns: + [*, N_res, N_res, C_z] pair embedding update + """ + if mask is None: + mask = m.new_ones(m.shape[:-1]) + + # [*, N_seq, N_res, C_m] + m = self.layer_norm(m) + + # [*, N_seq, N_res, C] + mask = mask.unsqueeze(-1) + a = self.linear_1(m) * mask + b = self.linear_2(m) * mask + + a = a.transpose(-2, -3) + b = b.transpose(-2, -3) + + if chunk_size is not None: + outer = self._chunk(a, b, chunk_size) + else: + outer = self._opm(a, b) + + # [*, N_res, N_res, 1] + norm = torch.einsum("...abc,...adc->...bdc", mask, mask) + + # [*, N_res, N_res, C_z] + outer = outer / (self.eps + norm) + + return outer diff --git a/openfold/model/pair_transition.py b/openfold/model/pair_transition.py new file mode 100644 index 0000000000000000000000000000000000000000..c81d2a4c0e087ffdb0c98d8cb0e91c5b24d6a0e8 --- /dev/null +++ b/openfold/model/pair_transition.py @@ -0,0 +1,99 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Optional + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear, LayerNorm +from openfold.utils.tensor_utils import chunk_layer + + +class PairTransition(nn.Module): + """ + Implements Algorithm 15. + """ + + def __init__(self, c_z, n): + """ + Args: + c_z: + Pair transition channel dimension + n: + Factor by which c_z is multiplied to obtain hidden channel + dimension + """ + super(PairTransition, self).__init__() + + self.c_z = c_z + self.n = n + + self.layer_norm = LayerNorm(self.c_z) + self.linear_1 = Linear(self.c_z, self.n * self.c_z, init="relu") + self.relu = nn.ReLU() + self.linear_2 = Linear(self.n * self.c_z, c_z, init="final") + + def _transition(self, z, mask): + # [*, N_res, N_res, C_hidden] + z = self.linear_1(z) + z = self.relu(z) + + # [*, N_res, N_res, C_z] + z = self.linear_2(z) * mask + + return z + + @torch.jit.ignore + def _chunk(self, + z: torch.Tensor, + mask: torch.Tensor, + chunk_size: int, + ) -> torch.Tensor: + return chunk_layer( + self._transition, + {"z": z, "mask": mask}, + chunk_size=chunk_size, + no_batch_dims=len(z.shape[:-2]), + ) + + + def forward(self, + z: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None, + ) -> torch.Tensor: + """ + Args: + z: + [*, N_res, N_res, C_z] pair embedding + Returns: + [*, N_res, N_res, C_z] pair embedding update + """ + # DISCREPANCY: DeepMind forgets to apply the mask in this module. + if mask is None: + mask = z.new_ones(z.shape[:-1]) + + # [*, N_res, N_res, 1] + mask = mask.unsqueeze(-1) + + # [*, N_res, N_res, C_z] + z = self.layer_norm(z) + + if chunk_size is not None: + z = self._chunk(z, mask, chunk_size) + else: + z = self._transition(z=z, mask=mask) + + return z diff --git a/openfold/model/primitives.py b/openfold/model/primitives.py new file mode 100644 index 0000000000000000000000000000000000000000..0742038e49b327661c3b6a137918f500e883cd04 --- /dev/null +++ b/openfold/model/primitives.py @@ -0,0 +1,587 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +import math +from typing import Optional, Callable, List, Tuple, Sequence +import numpy as np + +import deepspeed +import torch +import torch.nn as nn +from scipy.stats import truncnorm + +from openfold.utils.checkpointing import get_checkpoint_fn +from openfold.utils.tensor_utils import ( + permute_final_dims, + flatten_final_dims, + _chunk_slice, +) + + +def _prod(nums): + out = 1 + for n in nums: + out = out * n + return out + + +def _calculate_fan(linear_weight_shape, fan="fan_in"): + fan_out, fan_in = linear_weight_shape + + if fan == "fan_in": + f = fan_in + elif fan == "fan_out": + f = fan_out + elif fan == "fan_avg": + f = (fan_in + fan_out) / 2 + else: + raise ValueError("Invalid fan option") + + return f + + +def trunc_normal_init_(weights, scale=1.0, fan="fan_in"): + shape = weights.shape + f = _calculate_fan(shape, fan) + scale = scale / max(1, f) + a = -2 + b = 2 + std = math.sqrt(scale) / truncnorm.std(a=a, b=b, loc=0, scale=1) + size = _prod(shape) + samples = truncnorm.rvs(a=a, b=b, loc=0, scale=std, size=size) + samples = np.reshape(samples, shape) + with torch.no_grad(): + weights.copy_(torch.tensor(samples, device=weights.device)) + + +def lecun_normal_init_(weights): + trunc_normal_init_(weights, scale=1.0) + + +def he_normal_init_(weights): + trunc_normal_init_(weights, scale=2.0) + + +def glorot_uniform_init_(weights): + nn.init.xavier_uniform_(weights, gain=1) + + +def final_init_(weights): + with torch.no_grad(): + weights.fill_(0.0) + + +def gating_init_(weights): + with torch.no_grad(): + weights.fill_(0.0) + + +def normal_init_(weights): + torch.nn.init.kaiming_normal_(weights, nonlinearity="linear") + + +def ipa_point_weights_init_(weights): + with torch.no_grad(): + softplus_inverse_1 = 0.541324854612918 + weights.fill_(softplus_inverse_1) + + +class Linear(nn.Linear): + """ + A Linear layer with built-in nonstandard initializations. Called just + like torch.nn.Linear. + + Implements the initializers in 1.11.4, plus some additional ones found + in the code. + """ + + def __init__( + self, + in_dim: int, + out_dim: int, + bias: bool = True, + init: str = "default", + init_fn: Optional[Callable[[torch.Tensor, torch.Tensor], None]] = None, + ): + """ + Args: + in_dim: + The final dimension of inputs to the layer + out_dim: + The final dimension of layer outputs + bias: + Whether to learn an additive bias. True by default + init: + The initializer to use. Choose from: + + "default": LeCun fan-in truncated normal initialization + "relu": He initialization w/ truncated normal distribution + "glorot": Fan-average Glorot uniform initialization + "gating": Weights=0, Bias=1 + "normal": Normal initialization with std=1/sqrt(fan_in) + "final": Weights=0, Bias=0 + + Overridden by init_fn if the latter is not None. + init_fn: + A custom initializer taking weight and bias as inputs. + Overrides init if not None. + """ + super(Linear, self).__init__(in_dim, out_dim, bias=bias) + + if bias: + with torch.no_grad(): + self.bias.fill_(0) + + if init_fn is not None: + init_fn(self.weight, self.bias) + else: + if init == "default": + lecun_normal_init_(self.weight) + elif init == "relu": + he_normal_init_(self.weight) + elif init == "glorot": + glorot_uniform_init_(self.weight) + elif init == "gating": + gating_init_(self.weight) + if bias: + with torch.no_grad(): + self.bias.fill_(1.0) + elif init == "normal": + normal_init_(self.weight) + elif init == "final": + final_init_(self.weight) + else: + raise ValueError("Invalid init string.") + + +class LayerNorm(nn.Module): + def __init__(self, c_in, eps=1e-5): + super(LayerNorm, self).__init__() + + self.c_in = (c_in,) + self.eps = eps + + self.weight = nn.Parameter(torch.ones(c_in)) + self.bias = nn.Parameter(torch.zeros(c_in)) + + def forward(self, x): + d = x.dtype + if(d is torch.bfloat16 and not deepspeed.utils.is_initialized()): + with torch.cuda.amp.autocast(enabled=False): + out = nn.functional.layer_norm( + x, + self.c_in, + self.weight.to(dtype=d), + self.bias.to(dtype=d), + self.eps + ) + else: + out = nn.functional.layer_norm( + x, + self.c_in, + self.weight, + self.bias, + self.eps, + ) + + return out + +@torch.jit.ignore +def softmax(t: torch.Tensor, dim: int = -1) -> torch.Tensor: + """ + Softmax, but without automatic casting to fp32 when the input is of + type bfloat16 + """ + d = t.dtype + if(d is torch.bfloat16 and not deepspeed.utils.is_initialized()): + with torch.cuda.amp.autocast(enabled=False): + s = torch.nn.functional.softmax(t, dim=dim) + else: + s = torch.nn.functional.softmax(t, dim=dim) + + return s + + +#@torch.jit.script +def _attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, biases: List[torch.Tensor]) -> torch.Tensor: + # [*, H, Q, C_hidden] + query = permute_final_dims(query, (1, 0, 2)) + + # [*, H, C_hidden, K] + key = permute_final_dims(key, (1, 2, 0)) + + # [*, H, V, C_hidden] + value = permute_final_dims(value, (1, 0, 2)) + + # [*, H, Q, K] + a = torch.matmul(query, key) + + for b in biases: + a += b + + a = softmax(a, -1) + + # [*, H, Q, C_hidden] + a = torch.matmul(a, value) + + # [*, Q, H, C_hidden] + a = a.transpose(-2, -3) + + return a + + +@torch.jit.ignore +def _attention_chunked_trainable( + query, key, value, biases, chunk_size, chunk_dim, checkpoint, +): + if(checkpoint and len(biases) > 2): + raise ValueError( + "Checkpointed version permits only permits two bias terms" + ) + + def _checkpointable_attention(q, k, v, b1, b2): + bs = [b for b in [b1, b2] if b is not None] + return _attention(q, k, v, bs) + + o_chunks = [] + checkpoint_fn = get_checkpoint_fn() + count = query.shape[chunk_dim] + for start in range(0, count, chunk_size): + end = start + chunk_size + idx = [slice(None)] * len(query.shape) + idx[chunk_dim] = slice(start, end) + idx_tup = tuple(idx) + q_chunk = query[idx_tup] + k_chunk = key[idx_tup] + v_chunk = value[idx_tup] + + def _slice_bias(b): + idx[chunk_dim] = ( + slice(start, end) if b.shape[chunk_dim] != 1 else slice(None) + ) + return b[tuple(idx)] + + if(checkpoint): + bias_1_chunk, bias_2_chunk = [ + _slice_bias(b) if b is not None else None + for b in (biases + [None, None])[:2] + ] + + o_chunk = checkpoint_fn(_checkpointable_attention, + q_chunk, k_chunk, v_chunk, bias_1_chunk, bias_2_chunk + ) + else: + bias_chunks = [ + _slice_bias(b) for b in biases + ] + + o_chunk = _attention(q_chunk, k_chunk, v_chunk, bias_chunks) + + o_chunks.append(o_chunk) + + o = torch.cat(o_chunks, dim=chunk_dim) + return o + + +class Attention(nn.Module): + """ + Standard multi-head attention using AlphaFold's default layer + initialization. Allows multiple bias vectors. + """ + def __init__( + self, + c_q: int, + c_k: int, + c_v: int, + c_hidden: int, + no_heads: int, + gating: bool = True, + ): + """ + Args: + c_q: + Input dimension of query data + c_k: + Input dimension of key data + c_v: + Input dimension of value data + c_hidden: + Per-head hidden dimension + no_heads: + Number of attention heads + gating: + Whether the output should be gated using query data + """ + super(Attention, self).__init__() + + self.c_q = c_q + self.c_k = c_k + self.c_v = c_v + self.c_hidden = c_hidden + self.no_heads = no_heads + self.gating = gating + + # DISCREPANCY: c_hidden is not the per-head channel dimension, as + # stated in the supplement, but the overall channel dimension. + + self.linear_q = Linear( + self.c_q, self.c_hidden * self.no_heads, bias=False, init="glorot" + ) + self.linear_k = Linear( + self.c_k, self.c_hidden * self.no_heads, bias=False, init="glorot" + ) + self.linear_v = Linear( + self.c_v, self.c_hidden * self.no_heads, bias=False, init="glorot" + ) + self.linear_o = Linear( + self.c_hidden * self.no_heads, self.c_q, init="final" + ) + + self.linear_g = None + if self.gating: + self.linear_g = Linear( + self.c_q, self.c_hidden * self.no_heads, init="gating" + ) + + self.sigmoid = nn.Sigmoid() + + def _prep_qkv(self, + q_x: torch.Tensor, + kv_x: torch.Tensor + ) -> Tuple[ + torch.Tensor, torch.Tensor, torch.Tensor + ]: + # [*, Q/K/V, H * C_hidden] + q = self.linear_q(q_x) + k = self.linear_k(kv_x) + v = self.linear_v(kv_x) + + # [*, Q/K, H, C_hidden] + q = q.view(q.shape[:-1] + (self.no_heads, -1)) + k = k.view(k.shape[:-1] + (self.no_heads, -1)) + v = v.view(v.shape[:-1] + (self.no_heads, -1)) + + q /= math.sqrt(self.c_hidden) + + return q, k, v + + def _wrap_up(self, + o: torch.Tensor, + q_x: torch.Tensor + ) -> torch.Tensor: + if(self.linear_g is not None): + g = self.sigmoid(self.linear_g(q_x)) + + # [*, Q, H, C_hidden] + g = g.view(g.shape[:-1] + (self.no_heads, -1)) + o = o * g + + # [*, Q, H * C_hidden] + o = flatten_final_dims(o, 2) + + # [*, Q, C_q] + o = self.linear_o(o) + + return o + + def forward( + self, + q_x: torch.Tensor, + kv_x: torch.Tensor, + biases: Optional[List[torch.Tensor]] = None, + use_lma: bool = False, + q_chunk_size: Optional[int] = None, + kv_chunk_size: Optional[int] = None, + ) -> torch.Tensor: + """ + Args: + q_x: + [*, Q, C_q] query data + kv_x: + [*, K, C_k] key data + biases: + List of biases that broadcast to [*, H, Q, K] + use_lma: + Whether to use low-memory attention + q_chunk_size: + Query chunk size (for LMA) + kv_chunk_size: + Key/Value chunk size (for LMA) + Returns + [*, Q, C_q] attention update + """ + if(biases is None): + biases = [] + if(use_lma and (q_chunk_size is None or kv_chunk_size is None)): + raise ValueError( + "If use_lma is specified, q_chunk_size and kv_chunk_size must " + "be provided" + ) + + q, k, v = self._prep_qkv(q_x, kv_x) + + if(use_lma): + biases = [ + b.expand(b.shape[:-2] + (q_x.shape[-2],) + (kv_x.shape[-2],)) + for b in biases + ] + + o = _lma(q, k, v, biases, q_chunk_size, kv_chunk_size) + else: + o = _attention(q, k, v, biases) + + o = self._wrap_up(o, q_x) + + return o + + +class GlobalAttention(nn.Module): + def __init__(self, c_in, c_hidden, no_heads, inf, eps): + super(GlobalAttention, self).__init__() + + self.c_in = c_in + self.c_hidden = c_hidden + self.no_heads = no_heads + self.inf = inf + self.eps = eps + + self.linear_q = Linear( + c_in, c_hidden * no_heads, bias=False, init="glorot" + ) + + self.linear_k = Linear( + c_in, c_hidden, bias=False, init="glorot", + ) + self.linear_v = Linear( + c_in, c_hidden, bias=False, init="glorot", + ) + self.linear_g = Linear(c_in, c_hidden * no_heads, init="gating") + self.linear_o = Linear(c_hidden * no_heads, c_in, init="final") + + self.sigmoid = nn.Sigmoid() + + def forward(self, m: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: + # [*, N_res, C_in] + q = torch.sum(m * mask.unsqueeze(-1), dim=-2) / ( + torch.sum(mask, dim=-1)[..., None] + self.eps + ) + + # [*, N_res, H * C_hidden] + q = self.linear_q(q) + q *= (self.c_hidden ** (-0.5)) + + # [*, N_res, H, C_hidden] + q = q.view(q.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, N_seq, C_hidden] + k = self.linear_k(m) + v = self.linear_v(m) + + # [*, N_res, H, N_seq] + a = torch.matmul( + q, + k.transpose(-1, -2), # [*, N_res, C_hidden, N_seq] + ) + bias = (self.inf * (mask - 1))[..., :, None, :] + a += bias + a = softmax(a) + + # [*, N_res, H, C_hidden] + o = torch.matmul( + a, + v, + ) + + # [*, N_res, N_seq, C_hidden] + g = self.sigmoid(self.linear_g(m)) + + # [*, N_res, N_seq, H, C_hidden] + g = g.view(g.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, N_seq, H, C_hidden] + o = o.unsqueeze(-3) * g + + # [*, N_res, N_seq, H * C_hidden] + o = o.reshape(o.shape[:-2] + (-1,)) + + # [*, N_res, N_seq, C_in] + m = self.linear_o(o) + + return m + + +def _lma( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + biases: List[torch.Tensor], + q_chunk_size: int, + kv_chunk_size: int, +): + no_q, no_kv = q.shape[-3], k.shape[-3] + + # [*, Q, H, C_hidden] + o = q.new_zeros(q.shape) + for q_s in range(0, no_q, q_chunk_size): + q_chunk = q[..., q_s: q_s + q_chunk_size, :, :] + large_bias_chunks = [ + b[..., q_s: q_s + q_chunk_size, :] for b in biases + ] + + maxes = [] + weights = [] + values = [] + for kv_s in range(0, no_kv, kv_chunk_size): + k_chunk = k[..., kv_s: kv_s + kv_chunk_size, :, :] + v_chunk = v[..., kv_s: kv_s + kv_chunk_size, :, :] + small_bias_chunks = [ + b[..., kv_s: kv_s + kv_chunk_size] for b in large_bias_chunks + ] + + a = torch.einsum( + "...qhd,...khd->...hqk", q_chunk, k_chunk, + ) + + for b in small_bias_chunks: + a += b + + a = a.transpose(-2, -3) + + max_a = torch.max(a, dim=-1, keepdim=True)[0] + exp_a = torch.exp(a - max_a) + exp_v = torch.einsum("...vhf,...qhv->...qhf", v_chunk, exp_a) + + maxes.append(max_a.detach().squeeze(-1)) + weights.append(torch.sum(exp_a, dim=-1)) + values.append(exp_v) + + chunk_max = torch.stack(maxes, dim=-3) + chunk_weights = torch.stack(weights, dim=-3) + chunk_values = torch.stack(values, dim=-4) + + global_max = torch.max(chunk_max, dim=-3, keepdim=True)[0] + max_diffs = torch.exp(chunk_max - global_max) + chunk_values *= max_diffs.unsqueeze(-1) + chunk_weights *= max_diffs + + all_values = torch.sum(chunk_values, dim=-4) + all_weights = torch.sum(chunk_weights.unsqueeze(-1), dim=-4) + + q_chunk_out = all_values / all_weights + + o[..., q_s: q_s + q_chunk_size, :, :] = q_chunk_out + + return o diff --git a/openfold/model/structure_module.py b/openfold/model/structure_module.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6ba1fe42b995b123a43095bc51a947999b102b --- /dev/null +++ b/openfold/model/structure_module.py @@ -0,0 +1,820 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import reduce +import importlib +import math +import sys +from operator import mul + +import torch +import torch.nn as nn +from typing import Optional, Tuple, Sequence + +from openfold.model.primitives import Linear, LayerNorm, ipa_point_weights_init_ +from openfold.np.residue_constants import ( + restype_rigid_group_default_frame, + restype_atom14_to_rigid_group, + restype_atom14_mask, + restype_atom14_rigid_group_positions, +) +from openfold.utils.feats import ( + frames_and_literature_positions_to_atom14_pos, + torsion_angles_to_frames, +) +from openfold.utils.precision_utils import is_fp16_enabled +from openfold.utils.rigid_utils import Rotation, Rigid +from openfold.utils.tensor_utils import ( + dict_multimap, + permute_final_dims, + flatten_final_dims, +) + +# attn_core_inplace_cuda = importlib.import_module("attn_core_inplace_cuda") + + +class AngleResnetBlock(nn.Module): + def __init__(self, c_hidden): + """ + Args: + c_hidden: + Hidden channel dimension + """ + super(AngleResnetBlock, self).__init__() + + self.c_hidden = c_hidden + + self.linear_1 = Linear(self.c_hidden, self.c_hidden, init="relu") + self.linear_2 = Linear(self.c_hidden, self.c_hidden, init="final") + + self.relu = nn.ReLU() + + def forward(self, a: torch.Tensor) -> torch.Tensor: + + s_initial = a + + a = self.relu(a) + a = self.linear_1(a) + a = self.relu(a) + a = self.linear_2(a) + + return a + s_initial + + +class AngleResnet(nn.Module): + """ + Implements Algorithm 20, lines 11-14 + """ + + def __init__(self, c_in, c_hidden, no_blocks, no_angles, epsilon): + """ + Args: + c_in: + Input channel dimension + c_hidden: + Hidden channel dimension + no_blocks: + Number of resnet blocks + no_angles: + Number of torsion angles to generate + epsilon: + Small constant for normalization + """ + super(AngleResnet, self).__init__() + + self.c_in = c_in + self.c_hidden = c_hidden + self.no_blocks = no_blocks + self.no_angles = no_angles + self.eps = epsilon + + self.linear_in = Linear(self.c_in, self.c_hidden) + self.linear_initial = Linear(self.c_in, self.c_hidden) + + self.layers = nn.ModuleList() + for _ in range(self.no_blocks): + layer = AngleResnetBlock(c_hidden=self.c_hidden) + self.layers.append(layer) + + self.linear_out = Linear(self.c_hidden, self.no_angles * 2) + + self.relu = nn.ReLU() + + def forward( + self, s: torch.Tensor, s_initial: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + s: + [*, C_hidden] single embedding + s_initial: + [*, C_hidden] single embedding as of the start of the + StructureModule + Returns: + [*, no_angles, 2] predicted angles + """ + # NOTE: The ReLU's applied to the inputs are absent from the supplement + # pseudocode but present in the source. For maximal compatibility with + # the pretrained weights, I'm going with the source. + + # [*, C_hidden] + s_initial = self.relu(s_initial) + s_initial = self.linear_initial(s_initial) + s = self.relu(s) + s = self.linear_in(s) + s = s + s_initial + + for l in self.layers: + s = l(s) + + s = self.relu(s) + + # [*, no_angles * 2] + s = self.linear_out(s) + + # [*, no_angles, 2] + s = s.view(s.shape[:-1] + (-1, 2)) + + unnormalized_s = s + norm_denom = torch.sqrt( + torch.clamp( + torch.sum(s ** 2, dim=-1, keepdim=True), + min=self.eps, + ) + ) + s = s / norm_denom + + return unnormalized_s, s + + +class InvariantPointAttention(nn.Module): + """ + Implements Algorithm 22. + """ + def __init__( + self, + c_s: int, + c_z: int, + c_hidden: int, + no_heads: int, + no_qk_points: int, + no_v_points: int, + inf: float = 1e5, + eps: float = 1e-8, + ): + """ + Args: + c_s: + Single representation channel dimension + c_z: + Pair representation channel dimension + c_hidden: + Hidden channel dimension + no_heads: + Number of attention heads + no_qk_points: + Number of query/key points to generate + no_v_points: + Number of value points to generate + """ + super(InvariantPointAttention, self).__init__() + + self.c_s = c_s + self.c_z = c_z + self.c_hidden = c_hidden + self.no_heads = no_heads + self.no_qk_points = no_qk_points + self.no_v_points = no_v_points + self.inf = inf + self.eps = eps + + # These linear layers differ from their specifications in the + # supplement. There, they lack bias and use Glorot initialization. + # Here as in the official source, they have bias and use the default + # Lecun initialization. + hc = self.c_hidden * self.no_heads + self.linear_q = Linear(self.c_s, hc) + self.linear_kv = Linear(self.c_s, 2 * hc) + + hpq = self.no_heads * self.no_qk_points * 3 + self.linear_q_points = Linear(self.c_s, hpq) + + hpkv = self.no_heads * (self.no_qk_points + self.no_v_points) * 3 + self.linear_kv_points = Linear(self.c_s, hpkv) + + hpv = self.no_heads * self.no_v_points * 3 + + self.linear_b = Linear(self.c_z, self.no_heads) + + self.head_weights = nn.Parameter(torch.zeros((no_heads))) + ipa_point_weights_init_(self.head_weights) + + concat_out_dim = self.no_heads * ( + self.c_z + self.c_hidden + self.no_v_points * 4 + ) + self.linear_out = Linear(concat_out_dim, self.c_s, init="final") + + self.softmax = nn.Softmax(dim=-1) + self.softplus = nn.Softplus() + + def forward( + self, + s: torch.Tensor, + z: Optional[torch.Tensor], + r: Rigid, + mask: torch.Tensor, + inplace_safe: bool = False, + _offload_inference: bool = False, + _z_reference_list: Optional[Sequence[torch.Tensor]] = None, + ) -> torch.Tensor: + """ + Args: + s: + [*, N_res, C_s] single representation + z: + [*, N_res, N_res, C_z] pair representation + r: + [*, N_res] transformation object + mask: + [*, N_res] mask + Returns: + [*, N_res, C_s] single representation update + """ + if(_offload_inference and inplace_safe): + z = _z_reference_list + else: + z = [z] + + ####################################### + # Generate scalar and point activations + ####################################### + # [*, N_res, H * C_hidden] + q = self.linear_q(s) + kv = self.linear_kv(s) + + # [*, N_res, H, C_hidden] + q = q.view(q.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, H, 2 * C_hidden] + kv = kv.view(kv.shape[:-1] + (self.no_heads, -1)) + + # [*, N_res, H, C_hidden] + k, v = torch.split(kv, self.c_hidden, dim=-1) + + # [*, N_res, H * P_q * 3] + q_pts = self.linear_q_points(s) + + # This is kind of clunky, but it's how the original does it + # [*, N_res, H * P_q, 3] + q_pts = torch.split(q_pts, q_pts.shape[-1] // 3, dim=-1) + q_pts = torch.stack(q_pts, dim=-1) + q_pts = r[..., None].apply(q_pts) + + # [*, N_res, H, P_q, 3] + q_pts = q_pts.view( + q_pts.shape[:-2] + (self.no_heads, self.no_qk_points, 3) + ) + + # [*, N_res, H * (P_q + P_v) * 3] + kv_pts = self.linear_kv_points(s) + + # [*, N_res, H * (P_q + P_v), 3] + kv_pts = torch.split(kv_pts, kv_pts.shape[-1] // 3, dim=-1) + kv_pts = torch.stack(kv_pts, dim=-1) + kv_pts = r[..., None].apply(kv_pts) + + # [*, N_res, H, (P_q + P_v), 3] + kv_pts = kv_pts.view(kv_pts.shape[:-2] + (self.no_heads, -1, 3)) + + # [*, N_res, H, P_q/P_v, 3] + k_pts, v_pts = torch.split( + kv_pts, [self.no_qk_points, self.no_v_points], dim=-2 + ) + + ########################## + # Compute attention scores + ########################## + # [*, N_res, N_res, H] + b = self.linear_b(z[0]) + + if(_offload_inference): + assert(sys.getrefcount(z[0]) == 2) + z[0] = z[0].cpu() + + # [*, H, N_res, N_res] + if(is_fp16_enabled()): + with torch.cuda.amp.autocast(enabled=False): + a = torch.matmul( + permute_final_dims(q.float(), (1, 0, 2)), # [*, H, N_res, C_hidden] + permute_final_dims(k.float(), (1, 2, 0)), # [*, H, C_hidden, N_res] + ) + else: + a = torch.matmul( + permute_final_dims(q, (1, 0, 2)), # [*, H, N_res, C_hidden] + permute_final_dims(k, (1, 2, 0)), # [*, H, C_hidden, N_res] + ) + + a *= math.sqrt(1.0 / (3 * self.c_hidden)) + a += (math.sqrt(1.0 / 3) * permute_final_dims(b, (2, 0, 1))) + + # [*, N_res, N_res, H, P_q, 3] + pt_att = q_pts.unsqueeze(-4) - k_pts.unsqueeze(-5) + if(inplace_safe): + pt_att *= pt_att + else: + pt_att = pt_att ** 2 + + # [*, N_res, N_res, H, P_q] + pt_att = sum(torch.unbind(pt_att, dim=-1)) + head_weights = self.softplus(self.head_weights).view( + *((1,) * len(pt_att.shape[:-2]) + (-1, 1)) + ) + head_weights = head_weights * math.sqrt( + 1.0 / (3 * (self.no_qk_points * 9.0 / 2)) + ) + if(inplace_safe): + pt_att *= head_weights + else: + pt_att = pt_att * head_weights + + # [*, N_res, N_res, H] + pt_att = torch.sum(pt_att, dim=-1) * (-0.5) + # [*, N_res, N_res] + square_mask = mask.unsqueeze(-1) * mask.unsqueeze(-2) + square_mask = self.inf * (square_mask - 1) + + # [*, H, N_res, N_res] + pt_att = permute_final_dims(pt_att, (2, 0, 1)) + + if(inplace_safe): + a += pt_att + del pt_att + a += square_mask.unsqueeze(-3) + # in-place softmax + attn_core_inplace_cuda.forward_( + a, + reduce(mul, a.shape[:-1]), + a.shape[-1], + ) + else: + a = a + pt_att + a = a + square_mask.unsqueeze(-3) + a = self.softmax(a) + + ################ + # Compute output + ################ + # [*, N_res, H, C_hidden] + o = torch.matmul( + a, v.transpose(-2, -3).to(dtype=a.dtype) + ).transpose(-2, -3) + + # [*, N_res, H * C_hidden] + o = flatten_final_dims(o, 2) + + # [*, H, 3, N_res, P_v] + if(inplace_safe): + v_pts = permute_final_dims(v_pts, (1, 3, 0, 2)) + o_pt = [ + torch.matmul(a, v.to(a.dtype)) + for v in torch.unbind(v_pts, dim=-3) + ] + o_pt = torch.stack(o_pt, dim=-3) + else: + o_pt = torch.sum( + ( + a[..., None, :, :, None] + * permute_final_dims(v_pts, (1, 3, 0, 2))[..., None, :, :] + ), + dim=-2, + ) + + # [*, N_res, H, P_v, 3] + o_pt = permute_final_dims(o_pt, (2, 0, 3, 1)) + o_pt = r[..., None, None].invert_apply(o_pt) + + # [*, N_res, H * P_v] + o_pt_norm = flatten_final_dims( + torch.sqrt(torch.sum(o_pt ** 2, dim=-1) + self.eps), 2 + ) + + # [*, N_res, H * P_v, 3] + o_pt = o_pt.reshape(*o_pt.shape[:-3], -1, 3) + + if(_offload_inference): + z[0] = z[0].to(o_pt.device) + + # [*, N_res, H, C_z] + o_pair = torch.matmul(a.transpose(-2, -3), z[0].to(dtype=a.dtype)) + + # [*, N_res, H * C_z] + o_pair = flatten_final_dims(o_pair, 2) + + # [*, N_res, C_s] + s = self.linear_out( + torch.cat( + (o, *torch.unbind(o_pt, dim=-1), o_pt_norm, o_pair), dim=-1 + ).to(dtype=z[0].dtype) + ) + + return s + + +class BackboneUpdate(nn.Module): + """ + Implements part of Algorithm 23. + """ + + def __init__(self, c_s): + """ + Args: + c_s: + Single representation channel dimension + """ + super(BackboneUpdate, self).__init__() + + self.c_s = c_s + + self.linear = Linear(self.c_s, 6, init="final") + + def forward(self, s: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + [*, N_res, C_s] single representation + Returns: + [*, N_res, 6] update vector + """ + # [*, 6] + update = self.linear(s) + + return update + + +class StructureModuleTransitionLayer(nn.Module): + def __init__(self, c): + super(StructureModuleTransitionLayer, self).__init__() + + self.c = c + + self.linear_1 = Linear(self.c, self.c, init="relu") + self.linear_2 = Linear(self.c, self.c, init="relu") + self.linear_3 = Linear(self.c, self.c, init="final") + + self.relu = nn.ReLU() + + def forward(self, s): + s_initial = s + s = self.linear_1(s) + s = self.relu(s) + s = self.linear_2(s) + s = self.relu(s) + s = self.linear_3(s) + + s = s + s_initial + + return s + + +class StructureModuleTransition(nn.Module): + def __init__(self, c, num_layers, dropout_rate): + super(StructureModuleTransition, self).__init__() + + self.c = c + self.num_layers = num_layers + self.dropout_rate = dropout_rate + + self.layers = nn.ModuleList() + for _ in range(self.num_layers): + l = StructureModuleTransitionLayer(self.c) + self.layers.append(l) + + self.dropout = nn.Dropout(self.dropout_rate) + self.layer_norm = LayerNorm(self.c) + + def forward(self, s): + for l in self.layers: + s = l(s) + + s = self.dropout(s) + s = self.layer_norm(s) + + return s + + +class StructureModule(nn.Module): + def __init__( + self, + c_s, + c_z, + c_ipa, + c_resnet, + no_heads_ipa, + no_qk_points, + no_v_points, + dropout_rate, + no_blocks, + no_transition_layers, + no_resnet_blocks, + no_angles, + trans_scale_factor, + epsilon, + inf, + **kwargs, + ): + """ + Args: + c_s: + Single representation channel dimension + c_z: + Pair representation channel dimension + c_ipa: + IPA hidden channel dimension + c_resnet: + Angle resnet (Alg. 23 lines 11-14) hidden channel dimension + no_heads_ipa: + Number of IPA heads + no_qk_points: + Number of query/key points to generate during IPA + no_v_points: + Number of value points to generate during IPA + dropout_rate: + Dropout rate used throughout the layer + no_blocks: + Number of structure module blocks + no_transition_layers: + Number of layers in the single representation transition + (Alg. 23 lines 8-9) + no_resnet_blocks: + Number of blocks in the angle resnet + no_angles: + Number of angles to generate in the angle resnet + trans_scale_factor: + Scale of single representation transition hidden dimension + epsilon: + Small number used in angle resnet normalization + inf: + Large number used for attention masking + """ + super(StructureModule, self).__init__() + + self.c_s = c_s + self.c_z = c_z + self.c_ipa = c_ipa + self.c_resnet = c_resnet + self.no_heads_ipa = no_heads_ipa + self.no_qk_points = no_qk_points + self.no_v_points = no_v_points + self.dropout_rate = dropout_rate + self.no_blocks = no_blocks + self.no_transition_layers = no_transition_layers + self.no_resnet_blocks = no_resnet_blocks + self.no_angles = no_angles + self.trans_scale_factor = trans_scale_factor + self.epsilon = epsilon + self.inf = inf + + # Buffers to be lazily initialized later + # self.default_frames + # self.group_idx + # self.atom_mask + # self.lit_positions + + self.layer_norm_s = LayerNorm(self.c_s) + self.layer_norm_z = LayerNorm(self.c_z) + + self.linear_in = Linear(self.c_s, self.c_s) + + self.ipa = InvariantPointAttention( + self.c_s, + self.c_z, + self.c_ipa, + self.no_heads_ipa, + self.no_qk_points, + self.no_v_points, + inf=self.inf, + eps=self.epsilon, + ) + + self.ipa_dropout = nn.Dropout(self.dropout_rate) + self.layer_norm_ipa = LayerNorm(self.c_s) + + self.transition = StructureModuleTransition( + self.c_s, + self.no_transition_layers, + self.dropout_rate, + ) + + self.bb_update = BackboneUpdate(self.c_s) + + self.angle_resnet = AngleResnet( + self.c_s, + self.c_resnet, + self.no_resnet_blocks, + self.no_angles, + self.epsilon, + ) + + def forward( + self, + evoformer_output_dict, + aatype, + mask=None, + inplace_safe=False, + _offload_inference=False, + ): + """ + Args: + evoformer_output_dict: + Dictionary containing: + "single": + [*, N_res, C_s] single representation + "pair": + [*, N_res, N_res, C_z] pair representation + aatype: + [*, N_res] amino acid indices + mask: + Optional [*, N_res] sequence mask + Returns: + A dictionary of outputs + """ + s = evoformer_output_dict["single"] + + if mask is None: + # [*, N] + mask = s.new_ones(s.shape[:-1]) + + # [*, N, C_s] + s = self.layer_norm_s(s) + + # [*, N, N, C_z] + z = self.layer_norm_z(evoformer_output_dict["pair"]) + + z_reference_list = None + if(_offload_inference): + assert(sys.getrefcount(evoformer_output_dict["pair"]) == 2) + evoformer_output_dict["pair"] = evoformer_output_dict["pair"].cpu() + z_reference_list = [z] + z = None + + # [*, N, C_s] + s_initial = s + s = self.linear_in(s) + + # [*, N] + rigids = Rigid.identity( + s.shape[:-1], + s.dtype, + s.device, + self.training, + fmt="quat", + ) + outputs = [] + for i in range(self.no_blocks): + # [*, N, C_s] + s = s + self.ipa( + s, + z, + rigids, + mask, + inplace_safe=inplace_safe, + _offload_inference=_offload_inference, + _z_reference_list=z_reference_list + ) + s = self.ipa_dropout(s) + s = self.layer_norm_ipa(s) + s = self.transition(s) + + # [*, N] + rigids = rigids.compose_q_update_vec(self.bb_update(s)) + + # To hew as closely as possible to AlphaFold, we convert our + # quaternion-based transformations to rotation-matrix ones + # here + backb_to_global = Rigid( + Rotation( + rot_mats=rigids.get_rots().get_rot_mats(), + quats=None + ), + rigids.get_trans(), + ) + + backb_to_global = backb_to_global.scale_translation( + self.trans_scale_factor + ) + + # [*, N, 7, 2] + unnormalized_angles, angles = self.angle_resnet(s, s_initial) + + all_frames_to_global = self.torsion_angles_to_frames( + backb_to_global, + angles, + aatype, + ) + + pred_xyz = self.frames_and_literature_positions_to_atom14_pos( + all_frames_to_global, + aatype, + ) + + scaled_rigids = rigids.scale_translation(self.trans_scale_factor) + + preds = { + "frames": scaled_rigids.to_tensor_7(), + "sidechain_frames": all_frames_to_global.to_tensor_4x4(), + "unnormalized_angles": unnormalized_angles, + "angles": angles, + "positions": pred_xyz, + "states": s, + } + + outputs.append(preds) + + rigids = rigids.stop_rot_gradient() + + del z, z_reference_list + + if(_offload_inference): + evoformer_output_dict["pair"] = ( + evoformer_output_dict["pair"].to(s.device) + ) + + outputs = dict_multimap(torch.stack, outputs) + outputs["single"] = s + + return outputs + + def _init_residue_constants(self, float_dtype, device): + if not hasattr(self, "default_frames"): + self.register_buffer( + "default_frames", + torch.tensor( + restype_rigid_group_default_frame, + dtype=float_dtype, + device=device, + requires_grad=False, + ), + persistent=False, + ) + if not hasattr(self, "group_idx"): + self.register_buffer( + "group_idx", + torch.tensor( + restype_atom14_to_rigid_group, + device=device, + requires_grad=False, + ), + persistent=False, + ) + if not hasattr(self, "atom_mask"): + self.register_buffer( + "atom_mask", + torch.tensor( + restype_atom14_mask, + dtype=float_dtype, + device=device, + requires_grad=False, + ), + persistent=False, + ) + if not hasattr(self, "lit_positions"): + self.register_buffer( + "lit_positions", + torch.tensor( + restype_atom14_rigid_group_positions, + dtype=float_dtype, + device=device, + requires_grad=False, + ), + persistent=False, + ) + + def torsion_angles_to_frames(self, r, alpha, f): + # Lazily initialize the residue constants on the correct device + self._init_residue_constants(alpha.dtype, alpha.device) + # Separated purely to make testing less annoying + return torsion_angles_to_frames(r, alpha, f, self.default_frames) + + def frames_and_literature_positions_to_atom14_pos( + self, r, f # [*, N, 8] # [*, N] + ): + # Lazily initialize the residue constants on the correct device + self._init_residue_constants(r.get_rots().dtype, r.get_rots().device) + return frames_and_literature_positions_to_atom14_pos( + r, + f, + self.default_frames, + self.group_idx, + self.atom_mask, + self.lit_positions, + ) diff --git a/openfold/model/template.py b/openfold/model/template.py new file mode 100644 index 0000000000000000000000000000000000000000..d0e1fd95bd0fa073b55be7714a794f14ae0234d0 --- /dev/null +++ b/openfold/model/template.py @@ -0,0 +1,333 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial +import math +from typing import Optional, List + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear, LayerNorm, Attention +from openfold.model.dropout import ( + DropoutRowwise, + DropoutColumnwise, +) +from openfold.model.pair_transition import PairTransition +from openfold.model.triangular_attention import ( + TriangleAttentionStartingNode, + TriangleAttentionEndingNode, +) +from openfold.model.triangular_multiplicative_update import ( + TriangleMultiplicationOutgoing, + TriangleMultiplicationIncoming, +) +from openfold.utils.checkpointing import checkpoint_blocks +from openfold.utils.tensor_utils import ( + chunk_layer, + permute_final_dims, + flatten_final_dims, +) + + +class TemplatePointwiseAttention(nn.Module): + """ + Implements Algorithm 17. + """ + def __init__(self, c_t, c_z, c_hidden, no_heads, inf, **kwargs): + """ + Args: + c_t: + Template embedding channel dimension + c_z: + Pair embedding channel dimension + c_hidden: + Hidden channel dimension + """ + super(TemplatePointwiseAttention, self).__init__() + + self.c_t = c_t + self.c_z = c_z + self.c_hidden = c_hidden + self.no_heads = no_heads + self.inf = inf + + self.mha = Attention( + self.c_z, + self.c_t, + self.c_t, + self.c_hidden, + self.no_heads, + gating=False, + ) + + def _chunk(self, + z: torch.Tensor, + t: torch.Tensor, + biases: List[torch.Tensor], + chunk_size: int, + ) -> torch.Tensor: + mha_inputs = { + "q_x": z, + "kv_x": t, + "biases": biases, + } + return chunk_layer( + self.mha, + mha_inputs, + chunk_size=chunk_size, + no_batch_dims=len(z.shape[:-2]), + ) + + + def forward(self, + t: torch.Tensor, + z: torch.Tensor, + template_mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None + ) -> torch.Tensor: + """ + Args: + t: + [*, N_templ, N_res, N_res, C_t] template embedding + z: + [*, N_res, N_res, C_t] pair embedding + template_mask: + [*, N_templ] template mask + Returns: + [*, N_res, N_res, C_z] pair embedding update + """ + if template_mask is None: + template_mask = t.new_ones(t.shape[:-3]) + + bias = self.inf * (template_mask[..., None, None, None, None, :] - 1) + + # [*, N_res, N_res, 1, C_z] + z = z.unsqueeze(-2) + + # [*, N_res, N_res, N_temp, C_t] + t = permute_final_dims(t, (1, 2, 0, 3)) + + # [*, N_res, N_res, 1, C_z] + biases = [bias] + if chunk_size is not None: + z = self._chunk(z, t, biases, chunk_size) + else: + z = self.mha(q_x=z, kv_x=t, biases=biases) + + # [*, N_res, N_res, C_z] + z = z.squeeze(-2) + + return z + + +class TemplatePairStackBlock(nn.Module): + def __init__( + self, + c_t: int, + c_hidden_tri_att: int, + c_hidden_tri_mul: int, + no_heads: int, + pair_transition_n: int, + dropout_rate: float, + inf: float, + **kwargs, + ): + super(TemplatePairStackBlock, self).__init__() + + self.c_t = c_t + self.c_hidden_tri_att = c_hidden_tri_att + self.c_hidden_tri_mul = c_hidden_tri_mul + self.no_heads = no_heads + self.pair_transition_n = pair_transition_n + self.dropout_rate = dropout_rate + self.inf = inf + + self.dropout_row = DropoutRowwise(self.dropout_rate) + self.dropout_col = DropoutColumnwise(self.dropout_rate) + + self.tri_att_start = TriangleAttentionStartingNode( + self.c_t, + self.c_hidden_tri_att, + self.no_heads, + inf=inf, + ) + self.tri_att_end = TriangleAttentionEndingNode( + self.c_t, + self.c_hidden_tri_att, + self.no_heads, + inf=inf, + ) + + self.tri_mul_out = TriangleMultiplicationOutgoing( + self.c_t, + self.c_hidden_tri_mul, + ) + self.tri_mul_in = TriangleMultiplicationIncoming( + self.c_t, + self.c_hidden_tri_mul, + ) + + self.pair_transition = PairTransition( + self.c_t, + self.pair_transition_n, + ) + + def forward(self, + z: torch.Tensor, + mask: torch.Tensor, + chunk_size: Optional[int] = None, + _mask_trans: bool = True + ): + single_templates = [ + t.unsqueeze(-4) for t in torch.unbind(z, dim=-4) + ] + single_templates_masks = [ + m.unsqueeze(-3) for m in torch.unbind(mask, dim=-3) + ] + for i in range(len(single_templates)): + single = single_templates[i] + single_mask = single_templates_masks[i] + + single = single + self.dropout_row( + self.tri_att_start( + single, + chunk_size=chunk_size, + mask=single_mask + ) + ) + single = single + self.dropout_col( + self.tri_att_end( + single, + chunk_size=chunk_size, + mask=single_mask + ) + ) + single = single + self.dropout_row( + self.tri_mul_out( + single, + mask=single_mask + ) + ) + single = single + self.dropout_row( + self.tri_mul_in( + single, + mask=single_mask + ) + ) + single = single + self.pair_transition( + single, + mask=single_mask if _mask_trans else None, + chunk_size=chunk_size, + ) + + single_templates[i] = single + + z = torch.cat(single_templates, dim=-4) + + return z + + +class TemplatePairStack(nn.Module): + """ + Implements Algorithm 16. + """ + def __init__( + self, + c_t, + c_hidden_tri_att, + c_hidden_tri_mul, + no_blocks, + no_heads, + pair_transition_n, + dropout_rate, + blocks_per_ckpt, + inf=1e9, + **kwargs, + ): + """ + Args: + c_t: + Template embedding channel dimension + c_hidden_tri_att: + Per-head hidden dimension for triangular attention + c_hidden_tri_att: + Hidden dimension for triangular multiplication + no_blocks: + Number of blocks in the stack + pair_transition_n: + Scale of pair transition (Alg. 15) hidden dimension + dropout_rate: + Dropout rate used throughout the stack + blocks_per_ckpt: + Number of blocks per activation checkpoint. None disables + activation checkpointing + """ + super(TemplatePairStack, self).__init__() + + self.blocks_per_ckpt = blocks_per_ckpt + + self.blocks = nn.ModuleList() + for _ in range(no_blocks): + block = TemplatePairStackBlock( + c_t=c_t, + c_hidden_tri_att=c_hidden_tri_att, + c_hidden_tri_mul=c_hidden_tri_mul, + no_heads=no_heads, + pair_transition_n=pair_transition_n, + dropout_rate=dropout_rate, + inf=inf, + ) + self.blocks.append(block) + + self.layer_norm = LayerNorm(c_t) + + def forward( + self, + t: torch.tensor, + mask: torch.tensor, + chunk_size: int, + _mask_trans: bool = True, + ): + """ + Args: + t: + [*, N_templ, N_res, N_res, C_t] template embedding + mask: + [*, N_templ, N_res, N_res] mask + Returns: + [*, N_templ, N_res, N_res, C_t] template embedding update + """ + if(mask.shape[-3] == 1): + expand_idx = list(mask.shape) + expand_idx[-3] = t.shape[-4] + mask = mask.expand(*expand_idx) + + t, = checkpoint_blocks( + blocks=[ + partial( + b, + mask=mask, + chunk_size=chunk_size, + _mask_trans=_mask_trans, + ) + for b in self.blocks + ], + args=(t,), + blocks_per_ckpt=self.blocks_per_ckpt if self.training else None, + ) + + t = self.layer_norm(t) + + return t diff --git a/openfold/model/torchscript.py b/openfold/model/torchscript.py new file mode 100644 index 0000000000000000000000000000000000000000..cac7e6725b04f9383bd27cb5599b4e5172462de5 --- /dev/null +++ b/openfold/model/torchscript.py @@ -0,0 +1,215 @@ +# Copyright 2021 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Sequence, Tuple + +import torch +import torch.nn as nn + +from openfold.model.dropout import ( + DropoutRowwise, + DropoutColumnwise, +) +from openfold.model.evoformer import ( + EvoformerBlock, + EvoformerStack, +) +from openfold.model.outer_product_mean import OuterProductMean +from openfold.model.msa import ( + MSARowAttentionWithPairBias, + MSAColumnAttention, + MSAColumnGlobalAttention, +) +from openfold.model.pair_transition import PairTransition +from openfold.model.primitives import Attention, GlobalAttention +from openfold.model.structure_module import ( + InvariantPointAttention, + BackboneUpdate, +) +from openfold.model.template import TemplatePairStackBlock +from openfold.model.triangular_attention import ( + TriangleAttentionStartingNode, + TriangleAttentionEndingNode, +) +from openfold.model.triangular_multiplicative_update import ( + TriangleMultiplicationOutgoing, + TriangleMultiplicationIncoming, +) + + +def script_preset_(model: torch.nn.Module): + """ + TorchScript a handful of low-level but frequently used submodule types + that are known to be scriptable. + + Args: + model: + A torch.nn.Module. It should contain at least some modules from + this repository, or this function won't do anything. + """ + script_submodules_( + model, + [ + nn.Dropout, + Attention, + GlobalAttention, + EvoformerBlock, + #TemplatePairStackBlock, + ], + attempt_trace=False, + batch_dims=None, + ) + + +def _get_module_device(module: torch.nn.Module) -> torch.device: + """ + Fetches the device of a module, assuming that all of the module's + parameters reside on a single device + + Args: + module: A torch.nn.Module + Returns: + The module's device + """ + return next(module.parameters()).device + + +def _trace_module(module, batch_dims=None): + if(batch_dims is None): + batch_dims = () + + # Stand-in values + n_seq = 10 + n_res = 10 + + device = _get_module_device(module) + + def msa(channel_dim): + return torch.rand( + (*batch_dims, n_seq, n_res, channel_dim), + device=device, + ) + + def pair(channel_dim): + return torch.rand( + (*batch_dims, n_res, n_res, channel_dim), + device=device, + ) + + if(isinstance(module, MSARowAttentionWithPairBias)): + inputs = { + "forward": ( + msa(module.c_in), # m + pair(module.c_z), # z + torch.randint( + 0, 2, + (*batch_dims, n_seq, n_res) + ), # mask + ), + } + elif(isinstance(module, MSAColumnAttention)): + inputs = { + "forward": ( + msa(module.c_in), # m + torch.randint( + 0, 2, + (*batch_dims, n_seq, n_res) + ), # mask + ), + } + elif(isinstance(module, OuterProductMean)): + inputs = { + "forward": ( + msa(module.c_m), + torch.randint( + 0, 2, + (*batch_dims, n_seq, n_res) + ) + ) + } + else: + raise TypeError( + f"tracing is not supported for modules of type {type(module)}" + ) + + return torch.jit.trace_module(module, inputs) + + +def _script_submodules_helper_( + model, + types, + attempt_trace, + to_trace, +): + for name, child in model.named_children(): + if(types is None or any(isinstance(child, t) for t in types)): + try: + scripted = torch.jit.script(child) + setattr(model, name, scripted) + continue + except (RuntimeError, torch.jit.frontend.NotSupportedError) as e: + if(attempt_trace): + to_trace.add(type(child)) + else: + raise e + + _script_submodules_helper_(child, types, attempt_trace, to_trace) + + +def _trace_submodules_( + model, + types, + batch_dims=None, +): + for name, child in model.named_children(): + if(any(isinstance(child, t) for t in types)): + traced = _trace_module(child, batch_dims=batch_dims) + setattr(model, name, traced) + else: + _trace_submodules_(child, types, batch_dims=batch_dims) + + +def script_submodules_( + model: nn.Module, + types: Optional[Sequence[type]] = None, + attempt_trace: Optional[bool] = True, + batch_dims: Optional[Tuple[int]] = None, +): + """ + Convert all submodules whose types match one of those in the input + list to recursively scripted equivalents in place. To script the entire + model, just call torch.jit.script on it directly. + + When types is None, all submodules are scripted. + + Args: + model: + A torch.nn.Module + types: + A list of types of submodules to script + attempt_trace: + Whether to attempt to trace specified modules if scripting + fails. Recall that tracing eliminates all conditional + logic---with great tracing comes the mild responsibility of + having to remember to ensure that the modules in question + perform the same computations no matter what. + """ + to_trace = set() + + # Aggressively script as much as possible first... + _script_submodules_helper_(model, types, attempt_trace, to_trace) + + # ... and then trace stragglers. + if(attempt_trace and len(to_trace) > 0): + _trace_submodules_(model, to_trace, batch_dims=batch_dims) diff --git a/openfold/model/triangular_attention.py b/openfold/model/triangular_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..91fa2bb62a110825cc584eb2aff02e753ad3b307 --- /dev/null +++ b/openfold/model/triangular_attention.py @@ -0,0 +1,139 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partialmethod, partial +import math +from typing import Optional, List + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear, LayerNorm, Attention +from openfold.utils.tensor_utils import ( + chunk_layer, + permute_final_dims, + flatten_final_dims, +) + + +class TriangleAttention(nn.Module): + def __init__( + self, c_in, c_hidden, no_heads, starting, inf=1e9 + ): + """ + Args: + c_in: + Input channel dimension + c_hidden: + Overall hidden channel dimension (not per-head) + no_heads: + Number of attention heads + """ + super(TriangleAttention, self).__init__() + + self.c_in = c_in + self.c_hidden = c_hidden + self.no_heads = no_heads + self.starting = starting + self.inf = inf + + self.layer_norm = LayerNorm(self.c_in) + + self.linear = Linear(c_in, self.no_heads, bias=False, init="normal") + + self.mha = Attention( + self.c_in, self.c_in, self.c_in, self.c_hidden, self.no_heads + ) + + @torch.jit.ignore + def _chunk(self, + x: torch.Tensor, + biases: List[torch.Tensor], + chunk_size: int, + ) -> torch.Tensor: + mha_inputs = { + "q_x": x, + "kv_x": x, + "biases": biases, + } + return chunk_layer( + partial(self.mha), + mha_inputs, + chunk_size=chunk_size, + no_batch_dims=len(x.shape[:-2]), + ) + + def forward(self, + x: torch.Tensor, + mask: Optional[torch.Tensor] = None, + chunk_size: Optional[int] = None + ) -> torch.Tensor: + """ + Args: + x: + [*, I, J, C_in] input tensor (e.g. the pair representation) + Returns: + [*, I, J, C_in] output tensor + """ + if mask is None: + # [*, I, J] + mask = x.new_ones( + x.shape[:-1], + ) + + # Shape annotations assume self.starting. Else, I and J are flipped + if not self.starting: + x = x.transpose(-2, -3) + mask = mask.transpose(-1, -2) + + # [*, I, J, C_in] + x = self.layer_norm(x) + + # [*, I, 1, 1, J] + mask_bias = (self.inf * (mask - 1))[..., :, None, None, :] + + # [*, H, I, J] + triangle_bias = permute_final_dims(self.linear(x), (2, 0, 1)) + + # [*, 1, H, I, J] + triangle_bias = triangle_bias.unsqueeze(-4) + + biases = [mask_bias, triangle_bias] + + if chunk_size is not None: + x = self._chunk(x, biases, chunk_size) + else: + x = self.mha(q_x=x, kv_x=x, biases=biases) + + if not self.starting: + x = x.transpose(-2, -3) + + return x + + +class TriangleAttentionStartingNode(TriangleAttention): + """ + Implements Algorithm 13. + """ + + __init__ = partialmethod(TriangleAttention.__init__, starting=True) + + +class TriangleAttentionEndingNode(TriangleAttention): + """ + Implements Algorithm 14. + """ + + __init__ = partialmethod(TriangleAttention.__init__, starting=False) diff --git a/openfold/model/triangular_multiplicative_update.py b/openfold/model/triangular_multiplicative_update.py new file mode 100644 index 0000000000000000000000000000000000000000..2edd24f6f8ccab7e73e577ddc2aaa6d2443757ed --- /dev/null +++ b/openfold/model/triangular_multiplicative_update.py @@ -0,0 +1,127 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partialmethod +from typing import Optional + +import torch +import torch.nn as nn + +from openfold.model.primitives import Linear, LayerNorm +from openfold.utils.tensor_utils import permute_final_dims + + +class TriangleMultiplicativeUpdate(nn.Module): + """ + Implements Algorithms 11 and 12. + """ + def __init__(self, c_z, c_hidden, _outgoing=True): + """ + Args: + c_z: + Input channel dimension + c: + Hidden channel dimension + """ + super(TriangleMultiplicativeUpdate, self).__init__() + self.c_z = c_z + self.c_hidden = c_hidden + self._outgoing = _outgoing + + self.linear_a_p = Linear(self.c_z, self.c_hidden) + self.linear_a_g = Linear(self.c_z, self.c_hidden, init="gating") + self.linear_b_p = Linear(self.c_z, self.c_hidden) + self.linear_b_g = Linear(self.c_z, self.c_hidden, init="gating") + self.linear_g = Linear(self.c_z, self.c_z, init="gating") + self.linear_z = Linear(self.c_hidden, self.c_z, init="final") + + self.layer_norm_in = LayerNorm(self.c_z) + self.layer_norm_out = LayerNorm(self.c_hidden) + + self.sigmoid = nn.Sigmoid() + + def _combine_projections(self, + a: torch.Tensor, + b: torch.Tensor, + ) -> torch.Tensor: + raise NotImplementedError("This method needs to be overridden") + + def forward(self, + z: torch.Tensor, + mask: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Args: + x: + [*, N_res, N_res, C_z] input tensor + mask: + [*, N_res, N_res] input mask + Returns: + [*, N_res, N_res, C_z] output tensor + """ + if mask is None: + mask = z.new_ones(z.shape[:-1]) + + mask = mask.unsqueeze(-1) + + z = self.layer_norm_in(z) + a = self.linear_a_p(z) * self.sigmoid(self.linear_a_g(z)) + a = a * mask + b = self.linear_b_p(z) * self.sigmoid(self.linear_b_g(z)) + b = b * mask + x = self._combine_projections(a, b) + x = self.layer_norm_out(x) + x = self.linear_z(x) + g = self.sigmoid(self.linear_g(z)) + z = x * g + + return z + + +class TriangleMultiplicationOutgoing(TriangleMultiplicativeUpdate): + """ + Implements Algorithm 11. + """ + def _combine_projections(self, + a: torch.Tensor, # [*, N_i, N_k, C] + b: torch.Tensor, # [*, N_j, N_k, C] + ): + # [*, C, N_i, N_j] + p = torch.matmul( + permute_final_dims(a, (2, 0, 1)), + permute_final_dims(b, (2, 1, 0)), + ) + + # [*, N_i, N_j, C] + return permute_final_dims(p, (1, 2, 0)) + + +class TriangleMultiplicationIncoming(TriangleMultiplicativeUpdate): + """ + Implements Algorithm 12. + """ + def _combine_projections(self, + a: torch.Tensor, # [*, N_k, N_i, C] + b: torch.Tensor, # [*, N_k, N_j, C] + ): + # [*, C, N_i, N_j] + p = torch.matmul( + permute_final_dims(a, (2, 1, 0)), + permute_final_dims(b, (2, 0, 1)), + ) + + # [*, N_i, N_j, C] + return permute_final_dims(p, (1, 2, 0)) + diff --git a/openfold/np/__init__.py b/openfold/np/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25d1a5ba4be7300076d6f44af1541a33ca9e4ab1 --- /dev/null +++ b/openfold/np/__init__.py @@ -0,0 +1,16 @@ +import os +import glob +import importlib as importlib + +_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py")) +__all__ = [ + os.path.basename(f)[:-3] + for f in _files + if os.path.isfile(f) and not f.endswith("__init__.py") +] +_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__] +for _m in _modules: + globals()[_m[0]] = _m[1] + +# Avoid needlessly cluttering the global namespace +del _files, _m, _modules diff --git a/openfold/np/protein.py b/openfold/np/protein.py new file mode 100644 index 0000000000000000000000000000000000000000..75748b27d768d7765f1823c3eada5f7bcd0e4099 --- /dev/null +++ b/openfold/np/protein.py @@ -0,0 +1,438 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Protein data type.""" +import dataclasses +import io +from typing import Any, Sequence, Mapping, Optional +import re +import string + +from openfold.np import residue_constants +from Bio.PDB import PDBParser +import numpy as np + + +FeatureDict = Mapping[str, np.ndarray] +ModelOutput = Mapping[str, Any] # Is a nested dict. +PICO_TO_ANGSTROM = 0.01 + +@dataclasses.dataclass(frozen=True) +class Protein: + """Protein structure representation.""" + + # Cartesian coordinates of atoms in angstroms. The atom types correspond to + # residue_constants.atom_types, i.e. the first three are N, CA, CB. + atom_positions: np.ndarray # [num_res, num_atom_type, 3] + + # Amino-acid type for each residue represented as an integer between 0 and + # 20, where 20 is 'X'. + aatype: np.ndarray # [num_res] + + # Binary float mask to indicate presence of a particular atom. 1.0 if an atom + # is present and 0.0 if not. This should be used for loss masking. + atom_mask: np.ndarray # [num_res, num_atom_type] + + # Residue index as used in PDB. It is not necessarily continuous or 0-indexed. + residue_index: np.ndarray # [num_res] + + # B-factors, or temperature factors, of each residue (in sq. angstroms units), + # representing the displacement of the residue from its ground truth mean + # value. + b_factors: np.ndarray # [num_res, num_atom_type] + + # Chain indices for multi-chain predictions + chain_index: Optional[np.ndarray] = None + + # Optional remark about the protein. Included as a comment in output PDB + # files + remark: Optional[str] = None + + # Templates used to generate this protein (prediction-only) + parents: Optional[Sequence[str]] = None + + # Chain corresponding to each parent + parents_chain_index: Optional[Sequence[int]] = None + + +def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein: + """Takes a PDB string and constructs a Protein object. + + WARNING: All non-standard residue types will be converted into UNK. All + non-standard atoms will be ignored. + + Args: + pdb_str: The contents of the pdb file + chain_id: If None, then the pdb file must contain a single chain (which + will be parsed). If chain_id is specified (e.g. A), then only that chain + is parsed. + + Returns: + A new `Protein` parsed from the pdb contents. + """ + pdb_fh = io.StringIO(pdb_str) + parser = PDBParser(QUIET=True) + structure = parser.get_structure("none", pdb_fh) + models = list(structure.get_models()) + if len(models) != 1: + raise ValueError( + f"Only single model PDBs are supported. Found {len(models)} models." + ) + model = models[0] + + atom_positions = [] + aatype = [] + atom_mask = [] + residue_index = [] + chain_ids = [] + b_factors = [] + + for chain in model: + if(chain_id is not None and chain.id != chain_id): + continue + for res in chain: + if res.id[2] != " ": + raise ValueError( + f"PDB contains an insertion code at chain {chain.id} and residue " + f"index {res.id[1]}. These are not supported." + ) + res_shortname = residue_constants.restype_3to1.get(res.resname, "X") + restype_idx = residue_constants.restype_order.get( + res_shortname, residue_constants.restype_num + ) + pos = np.zeros((residue_constants.atom_type_num, 3)) + mask = np.zeros((residue_constants.atom_type_num,)) + res_b_factors = np.zeros((residue_constants.atom_type_num,)) + for atom in res: + if atom.name not in residue_constants.atom_types: + continue + pos[residue_constants.atom_order[atom.name]] = atom.coord + mask[residue_constants.atom_order[atom.name]] = 1.0 + res_b_factors[ + residue_constants.atom_order[atom.name] + ] = atom.bfactor + if np.sum(mask) < 0.5: + # If no known atom positions are reported for the residue then skip it. + continue + aatype.append(restype_idx) + atom_positions.append(pos) + atom_mask.append(mask) + residue_index.append(res.id[1]) + chain_ids.append(chain.id) + b_factors.append(res_b_factors) + + parents = None + parents_chain_index = None + if("PARENT" in pdb_str): + parents = [] + parents_chain_index = [] + chain_id = 0 + for l in pdb_str.split("\n"): + if("PARENT" in l): + if(not "N/A" in l): + parent_names = l.split()[1:] + parents.extend(parent_names) + parents_chain_index.extend([ + chain_id for _ in parent_names + ]) + chain_id += 1 + + unique_chain_ids = np.unique(chain_ids) + chain_id_mapping = {cid: n for n, cid in enumerate(string.ascii_uppercase)} + chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids]) + + return Protein( + atom_positions=np.array(atom_positions), + atom_mask=np.array(atom_mask), + aatype=np.array(aatype), + residue_index=np.array(residue_index), + chain_index=chain_index, + b_factors=np.array(b_factors), + parents=parents, + parents_chain_index=parents_chain_index, + ) + + +def from_proteinnet_string(proteinnet_str: str) -> Protein: + tag_re = r'(\[[A-Z]+\]\n)' + tags = [ + tag.strip() for tag in re.split(tag_re, proteinnet_str) if len(tag) > 0 + ] + groups = zip(tags[0::2], [l.split('\n') for l in tags[1::2]]) + + atoms = ['N', 'CA', 'C'] + aatype = None + atom_positions = None + atom_mask = None + for g in groups: + if("[PRIMARY]" == g[0]): + seq = g[1][0].strip() + for i in range(len(seq)): + if(seq[i] not in residue_constants.restypes): + seq[i] = 'X' + aatype = np.array([ + residue_constants.restype_order.get( + res_symbol, residue_constants.restype_num + ) for res_symbol in seq + ]) + elif("[TERTIARY]" == g[0]): + tertiary = [] + for axis in range(3): + tertiary.append(list(map(float, g[1][axis].split()))) + tertiary_np = np.array(tertiary) + atom_positions = np.zeros( + (len(tertiary[0])//3, residue_constants.atom_type_num, 3) + ).astype(np.float32) + for i, atom in enumerate(atoms): + atom_positions[:, residue_constants.atom_order[atom], :] = ( + np.transpose(tertiary_np[:, i::3]) + ) + atom_positions *= PICO_TO_ANGSTROM + elif("[MASK]" == g[0]): + mask = np.array(list(map({'-': 0, '+': 1}.get, g[1][0].strip()))) + atom_mask = np.zeros( + (len(mask), residue_constants.atom_type_num,) + ).astype(np.float32) + for i, atom in enumerate(atoms): + atom_mask[:, residue_constants.atom_order[atom]] = 1 + atom_mask *= mask[..., None] + + return Protein( + atom_positions=atom_positions, + atom_mask=atom_mask, + aatype=aatype, + residue_index=np.arange(len(aatype)), + b_factors=None, + ) + + +def get_pdb_headers(prot: Protein, chain_id: int = 0) -> Sequence[str]: + pdb_headers = [] + + remark = prot.remark + if(remark is not None): + pdb_headers.append(f"REMARK {remark}") + + parents = prot.parents + parents_chain_index = prot.parents_chain_index + if(parents_chain_index is not None): + parents = [ + p for i, p in zip(parents_chain_index, parents) if i == chain_id + ] + + if(parents is None or len(parents) == 0): + parents = ["N/A"] + + pdb_headers.append(f"PARENT {' '.join(parents)}") + + return pdb_headers + + +def add_pdb_headers(prot: Protein, pdb_str: str) -> str: + """ Add pdb headers to an existing PDB string. Useful during multi-chain + recycling + """ + out_pdb_lines = [] + lines = pdb_str.split('\n') + + remark = prot.remark + if(remark is not None): + out_pdb_lines.append(f"REMARK {remark}") + + parents_per_chain = None + if(prot.parents is not None and len(prot.parents) > 0): + parents_per_chain = [] + if(prot.parents_chain_index is not None): + cur_chain = prot.parents_chain_index[0] + parent_dict = {} + for p, i in zip(prot.parents, prot.parents_chain_index): + parent_dict.setdefault(str(i), []) + parent_dict[str(i)].append(p) + + max_idx = max([int(chain_idx) for chain_idx in parent_dict]) + for i in range(max_idx + 1): + chain_parents = parent_dict.get(str(i), ["N/A"]) + parents_per_chain.append(chain_parents) + else: + parents_per_chain.append(prot.parents) + else: + parents_per_chain = [["N/A"]] + + make_parent_line = lambda p: f"PARENT {' '.join(p)}" + + out_pdb_lines.append(make_parent_line(parents_per_chain[0])) + + chain_counter = 0 + for i, l in enumerate(lines): + if("PARENT" not in l and "REMARK" not in l): + out_pdb_lines.append(l) + if("TER" in l and not "END" in lines[i + 1]): + chain_counter += 1 + if(not chain_counter >= len(parents_per_chain)): + chain_parents = parents_per_chain[chain_counter] + else: + chain_parents = ["N/A"] + + out_pdb_lines.append(make_parent_line(chain_parents)) + + return '\n'.join(out_pdb_lines) + + +def to_pdb(prot: Protein) -> str: + """Converts a `Protein` instance to a PDB string. + + Args: + prot: The protein to convert to PDB. + + Returns: + PDB string. + """ + restypes = residue_constants.restypes + ["X"] + res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], "UNK") + atom_types = residue_constants.atom_types + + pdb_lines = [] + + atom_mask = prot.atom_mask + aatype = prot.aatype + atom_positions = prot.atom_positions + residue_index = prot.residue_index.astype(int) + b_factors = prot.b_factors + chain_index = prot.chain_index + + if np.any(aatype > residue_constants.restype_num): + raise ValueError("Invalid aatypes.") + + headers = get_pdb_headers(prot) + if(len(headers) > 0): + pdb_lines.extend(headers) + + n = aatype.shape[0] + atom_index = 1 + prev_chain_index = 0 + chain_tags = string.ascii_uppercase + # Add all atom sites. + for i in range(n): + res_name_3 = res_1to3(aatype[i]) + for atom_name, pos, mask, b_factor in zip( + atom_types, atom_positions[i], atom_mask[i], b_factors[i] + ): + if mask < 0.5: + continue + + record_type = "ATOM" + name = atom_name if len(atom_name) == 4 else f" {atom_name}" + alt_loc = "" + insertion_code = "" + occupancy = 1.00 + element = atom_name[ + 0 + ] # Protein supports only C, N, O, S, this works. + charge = "" + + chain_tag = "A" + if(chain_index is not None): + chain_tag = chain_tags[chain_index[i]] + + # PDB is a columnar format, every space matters here! + atom_line = ( + f"{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}" + f"{res_name_3:>3} {chain_tag:>1}" + f"{residue_index[i]:>4}{insertion_code:>1} " + f"{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}" + f"{occupancy:>6.2f}{b_factor:>6.2f} " + f"{element:>2}{charge:>2}" + ) + pdb_lines.append(atom_line) + atom_index += 1 + + should_terminate = (i == n - 1) + if(chain_index is not None): + if(i != n - 1 and chain_index[i + 1] != prev_chain_index): + should_terminate = True + prev_chain_index = chain_index[i + 1] + + if(should_terminate): + # Close the chain. + chain_end = "TER" + chain_termination_line = ( + f"{chain_end:<6}{atom_index:>5} " + f"{res_1to3(aatype[i]):>3} " + f"{chain_tag:>1}{residue_index[i]:>4}" + ) + pdb_lines.append(chain_termination_line) + atom_index += 1 + + if(i != n - 1): + # "prev" is a misnomer here. This happens at the beginning of + # each new chain. + pdb_lines.extend(get_pdb_headers(prot, prev_chain_index)) + + pdb_lines.append("END") + pdb_lines.append("") + return "\n".join(pdb_lines) + + +def ideal_atom_mask(prot: Protein) -> np.ndarray: + """Computes an ideal atom mask. + + `Protein.atom_mask` typically is defined according to the atoms that are + reported in the PDB. This function computes a mask according to heavy atoms + that should be present in the given sequence of amino acids. + + Args: + prot: `Protein` whose fields are `numpy.ndarray` objects. + + Returns: + An ideal atom mask. + """ + return residue_constants.STANDARD_ATOM_MASK[prot.aatype] + + +def from_prediction( + features: FeatureDict, + result: ModelOutput, + b_factors: Optional[np.ndarray] = None, + chain_index: Optional[np.ndarray] = None, + remark: Optional[str] = None, + parents: Optional[Sequence[str]] = None, + parents_chain_index: Optional[Sequence[int]] = None +) -> Protein: + """Assembles a protein from a prediction. + + Args: + features: Dictionary holding model inputs. + result: Dictionary holding model outputs. + b_factors: (Optional) B-factors to use for the protein. + chain_index: (Optional) Chain indices for multi-chain predictions + remark: (Optional) Remark about the prediction + parents: (Optional) List of template names + Returns: + A protein instance. + """ + if b_factors is None: + b_factors = np.zeros_like(result["final_atom_mask"]) + + return Protein( + aatype=features["aatype"], + atom_positions=result["final_atom_positions"], + atom_mask=result["final_atom_mask"], + residue_index=features["residue_index"] + 1, + b_factors=b_factors, + chain_index=chain_index, + remark=remark, + parents=parents, + parents_chain_index=parents_chain_index, + ) diff --git a/openfold/np/relax/__init__.py b/openfold/np/relax/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25d1a5ba4be7300076d6f44af1541a33ca9e4ab1 --- /dev/null +++ b/openfold/np/relax/__init__.py @@ -0,0 +1,16 @@ +import os +import glob +import importlib as importlib + +_files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py")) +__all__ = [ + os.path.basename(f)[:-3] + for f in _files + if os.path.isfile(f) and not f.endswith("__init__.py") +] +_modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__] +for _m in _modules: + globals()[_m[0]] = _m[1] + +# Avoid needlessly cluttering the global namespace +del _files, _m, _modules diff --git a/openfold/np/relax/amber_minimize.py b/openfold/np/relax/amber_minimize.py new file mode 100644 index 0000000000000000000000000000000000000000..8f35a2d9fdf56fca3e89d822b7c6a8f56a265ccd --- /dev/null +++ b/openfold/np/relax/amber_minimize.py @@ -0,0 +1,612 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Restrained Amber Minimization of a structure.""" + +import io +import time +from typing import Collection, Optional, Sequence + +from absl import logging +from openfold.np import ( + protein, + residue_constants, +) +import openfold.utils.loss as loss +from openfold.np.relax import cleanup, utils +import ml_collections +import numpy as np +import openmm +from openmm import unit +from openmm import app as openmm_app +from openmm.app.internal.pdbstructure import PdbStructure + +ENERGY = unit.kilocalories_per_mole +LENGTH = unit.angstroms + + +def will_restrain(atom: openmm_app.Atom, rset: str) -> bool: + """Returns True if the atom will be restrained by the given restraint set.""" + + if rset == "non_hydrogen": + return atom.element.name != "hydrogen" + elif rset == "c_alpha": + return atom.name == "CA" + + +def _add_restraints( + system: openmm.System, + reference_pdb: openmm_app.PDBFile, + stiffness: unit.Unit, + rset: str, + exclude_residues: Sequence[int], +): + """Adds a harmonic potential that restrains the system to a structure.""" + assert rset in ["non_hydrogen", "c_alpha"] + + force = openmm.CustomExternalForce( + "0.5 * k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)" + ) + force.addGlobalParameter("k", stiffness) + for p in ["x0", "y0", "z0"]: + force.addPerParticleParameter(p) + + for i, atom in enumerate(reference_pdb.topology.atoms()): + if atom.residue.index in exclude_residues: + continue + if will_restrain(atom, rset): + force.addParticle(i, reference_pdb.positions[i]) + logging.info( + "Restraining %d / %d particles.", + force.getNumParticles(), + system.getNumParticles(), + ) + system.addForce(force) + + +def _openmm_minimize( + pdb_str: str, + max_iterations: int, + tolerance: unit.Unit, + stiffness: unit.Unit, + restraint_set: str, + exclude_residues: Sequence[int], + use_gpu: bool, +): + """Minimize energy via openmm.""" + + pdb_file = io.StringIO(pdb_str) + pdb = openmm_app.PDBFile(pdb_file) + + force_field = openmm_app.ForceField("amber99sb.xml") + constraints = openmm_app.HBonds + system = force_field.createSystem(pdb.topology, constraints=constraints) + if stiffness > 0 * ENERGY / (LENGTH ** 2): + _add_restraints(system, pdb, stiffness, restraint_set, exclude_residues) + + integrator = openmm.LangevinIntegrator(0, 0.01, 0.0) + platform = openmm.Platform.getPlatformByName("CUDA" if use_gpu else "CPU") + simulation = openmm_app.Simulation( + pdb.topology, system, integrator, platform + ) + simulation.context.setPositions(pdb.positions) + + ret = {} + state = simulation.context.getState(getEnergy=True, getPositions=True) + ret["einit"] = state.getPotentialEnergy().value_in_unit(ENERGY) + ret["posinit"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH) + simulation.minimizeEnergy(maxIterations=max_iterations, tolerance=tolerance) + state = simulation.context.getState(getEnergy=True, getPositions=True) + ret["efinal"] = state.getPotentialEnergy().value_in_unit(ENERGY) + ret["pos"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH) + ret["min_pdb"] = _get_pdb_string(simulation.topology, state.getPositions()) + return ret + + +def _get_pdb_string(topology: openmm_app.Topology, positions: unit.Quantity): + """Returns a pdb string provided OpenMM topology and positions.""" + with io.StringIO() as f: + openmm_app.PDBFile.writeFile(topology, positions, f) + return f.getvalue() + + +def _check_cleaned_atoms(pdb_cleaned_string: str, pdb_ref_string: str): + """Checks that no atom positions have been altered by cleaning.""" + cleaned = openmm_app.PDBFile(io.StringIO(pdb_cleaned_string)) + reference = openmm_app.PDBFile(io.StringIO(pdb_ref_string)) + + cl_xyz = np.array(cleaned.getPositions().value_in_unit(LENGTH)) + ref_xyz = np.array(reference.getPositions().value_in_unit(LENGTH)) + + for ref_res, cl_res in zip( + reference.topology.residues(), cleaned.topology.residues() + ): + assert ref_res.name == cl_res.name + for rat in ref_res.atoms(): + for cat in cl_res.atoms(): + if cat.name == rat.name: + if not np.array_equal( + cl_xyz[cat.index], ref_xyz[rat.index] + ): + raise ValueError( + f"Coordinates of cleaned atom {cat} do not match " + f"coordinates of reference atom {rat}." + ) + + +def _check_residues_are_well_defined(prot: protein.Protein): + """Checks that all residues contain non-empty atom sets.""" + if (prot.atom_mask.sum(axis=-1) == 0).any(): + raise ValueError( + "Amber minimization can only be performed on proteins with" + " well-defined residues. This protein contains at least" + " one residue with no atoms." + ) + + +def _check_atom_mask_is_ideal(prot): + """Sanity-check the atom mask is ideal, up to a possible OXT.""" + atom_mask = prot.atom_mask + ideal_atom_mask = protein.ideal_atom_mask(prot) + utils.assert_equal_nonterminal_atom_types(atom_mask, ideal_atom_mask) + + +def clean_protein(prot: protein.Protein, checks: bool = True): + """Adds missing atoms to Protein instance. + + Args: + prot: A `protein.Protein` instance. + checks: A `bool` specifying whether to add additional checks to the cleaning + process. + + Returns: + pdb_string: A string of the cleaned protein. + """ + _check_atom_mask_is_ideal(prot) + + # Clean pdb. + prot_pdb_string = protein.to_pdb(prot) + pdb_file = io.StringIO(prot_pdb_string) + alterations_info = {} + fixed_pdb = cleanup.fix_pdb(pdb_file, alterations_info) + fixed_pdb_file = io.StringIO(fixed_pdb) + pdb_structure = PdbStructure(fixed_pdb_file) + cleanup.clean_structure(pdb_structure, alterations_info) + + logging.info("alterations info: %s", alterations_info) + + # Write pdb file of cleaned structure. + as_file = openmm_app.PDBFile(pdb_structure) + pdb_string = _get_pdb_string(as_file.getTopology(), as_file.getPositions()) + if checks: + _check_cleaned_atoms(pdb_string, prot_pdb_string) + return pdb_string + + +def make_atom14_positions(prot): + """Constructs denser atom positions (14 dimensions instead of 37).""" + restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37 + restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14 + restype_atom14_mask = [] + + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt] + ] + + restype_atom14_to_atom37.append( + [ + (residue_constants.atom_order[name] if name else 0) + for name in atom_names + ] + ) + + atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} + restype_atom37_to_atom14.append( + [ + (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) + for name in residue_constants.atom_types + ] + ) + + restype_atom14_mask.append( + [(1.0 if name else 0.0) for name in atom_names] + ) + + # Add dummy mapping for restype 'UNK'. + restype_atom14_to_atom37.append([0] * 14) + restype_atom37_to_atom14.append([0] * 37) + restype_atom14_mask.append([0.0] * 14) + + restype_atom14_to_atom37 = np.array( + restype_atom14_to_atom37, dtype=int + ) + restype_atom37_to_atom14 = np.array( + restype_atom37_to_atom14, dtype=int + ) + restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32) + + # Create the mapping for (residx, atom14) --> atom37, i.e. an array + # with shape (num_res, 14) containing the atom37 indices for this protein. + residx_atom14_to_atom37 = restype_atom14_to_atom37[prot["aatype"]] + residx_atom14_mask = restype_atom14_mask[prot["aatype"]] + + # Create a mask for known ground truth positions. + residx_atom14_gt_mask = residx_atom14_mask * np.take_along_axis( + prot["all_atom_mask"], residx_atom14_to_atom37, axis=1 + ).astype(np.float32) + + # Gather the ground truth positions. + residx_atom14_gt_positions = residx_atom14_gt_mask[:, :, None] * ( + np.take_along_axis( + prot["all_atom_positions"], + residx_atom14_to_atom37[..., None], + axis=1, + ) + ) + + prot["atom14_atom_exists"] = residx_atom14_mask + prot["atom14_gt_exists"] = residx_atom14_gt_mask + prot["atom14_gt_positions"] = residx_atom14_gt_positions + + prot["residx_atom14_to_atom37"] = residx_atom14_to_atom37.astype(np.int64) + + # Create the gather indices for mapping back. + residx_atom37_to_atom14 = restype_atom37_to_atom14[prot["aatype"]] + prot["residx_atom37_to_atom14"] = residx_atom37_to_atom14.astype(np.int64) + + # Create the corresponding mask. + restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) + for restype, restype_letter in enumerate(residue_constants.restypes): + restype_name = residue_constants.restype_1to3[restype_letter] + atom_names = residue_constants.residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = residue_constants.atom_order[atom_name] + restype_atom37_mask[restype, atom_type] = 1 + + residx_atom37_mask = restype_atom37_mask[prot["aatype"]] + prot["atom37_atom_exists"] = residx_atom37_mask + + # As the atom naming is ambiguous for 7 of the 20 amino acids, provide + # alternative ground truth coordinates where the naming is swapped + restype_3 = [ + residue_constants.restype_1to3[res] + for res in residue_constants.restypes + ] + restype_3 += ["UNK"] + + # Matrices for renaming ambiguous atoms. + all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3} + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + correspondences = np.arange(14) + for source_atom_swap, target_atom_swap in swap.items(): + source_index = residue_constants.restype_name_to_atom14_names[ + resname + ].index(source_atom_swap) + target_index = residue_constants.restype_name_to_atom14_names[ + resname + ].index(target_atom_swap) + correspondences[source_index] = target_index + correspondences[target_index] = source_index + renaming_matrix = np.zeros((14, 14), dtype=np.float32) + for index, correspondence in enumerate(correspondences): + renaming_matrix[index, correspondence] = 1.0 + all_matrices[resname] = renaming_matrix.astype(np.float32) + renaming_matrices = np.stack( + [all_matrices[restype] for restype in restype_3] + ) + + # Pick the transformation matrices for the given residue sequence + # shape (num_res, 14, 14). + renaming_transform = renaming_matrices[prot["aatype"]] + + # Apply it to the ground truth positions. shape (num_res, 14, 3). + alternative_gt_positions = np.einsum( + "rac,rab->rbc", residx_atom14_gt_positions, renaming_transform + ) + prot["atom14_alt_gt_positions"] = alternative_gt_positions + + # Create the mask for the alternative ground truth (differs from the + # ground truth mask, if only one of the atoms in an ambiguous pair has a + # ground truth position). + alternative_gt_mask = np.einsum( + "ra,rab->rb", residx_atom14_gt_mask, renaming_transform + ) + + prot["atom14_alt_gt_exists"] = alternative_gt_mask + + # Create an ambiguous atoms mask. shape: (21, 14). + restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32) + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + for atom_name1, atom_name2 in swap.items(): + restype = residue_constants.restype_order[ + residue_constants.restype_3to1[resname] + ] + atom_idx1 = residue_constants.restype_name_to_atom14_names[ + resname + ].index(atom_name1) + atom_idx2 = residue_constants.restype_name_to_atom14_names[ + resname + ].index(atom_name2) + restype_atom14_is_ambiguous[restype, atom_idx1] = 1 + restype_atom14_is_ambiguous[restype, atom_idx2] = 1 + + # From this create an ambiguous_mask for the given sequence. + prot["atom14_atom_is_ambiguous"] = restype_atom14_is_ambiguous[ + prot["aatype"] + ] + + return prot + + +def find_violations(prot_np: protein.Protein): + """Analyzes a protein and returns structural violation information. + + Args: + prot_np: A protein. + + Returns: + violations: A `dict` of structure components with structural violations. + violation_metrics: A `dict` of violation metrics. + """ + batch = { + "aatype": prot_np.aatype, + "all_atom_positions": prot_np.atom_positions.astype(np.float32), + "all_atom_mask": prot_np.atom_mask.astype(np.float32), + "residue_index": prot_np.residue_index, + } + + batch["seq_mask"] = np.ones_like(batch["aatype"], np.float32) + batch = make_atom14_positions(batch) + + violations = loss.find_structural_violations_np( + batch=batch, + atom14_pred_positions=batch["atom14_gt_positions"], + config=ml_collections.ConfigDict( + { + "violation_tolerance_factor": 12, # Taken from model config. + "clash_overlap_tolerance": 1.5, # Taken from model config. + } + ), + ) + violation_metrics = loss.compute_violation_metrics_np( + batch=batch, + atom14_pred_positions=batch["atom14_gt_positions"], + violations=violations, + ) + + return violations, violation_metrics + + +def get_violation_metrics(prot: protein.Protein): + """Computes violation and alignment metrics.""" + structural_violations, struct_metrics = find_violations(prot) + violation_idx = np.flatnonzero( + structural_violations["total_per_residue_violations_mask"] + ) + + struct_metrics["residue_violations"] = violation_idx + struct_metrics["num_residue_violations"] = len(violation_idx) + struct_metrics["structural_violations"] = structural_violations + return struct_metrics + + +def _run_one_iteration( + *, + pdb_string: str, + max_iterations: int, + tolerance: float, + stiffness: float, + restraint_set: str, + max_attempts: int, + exclude_residues: Optional[Collection[int]] = None, + use_gpu: bool, +): + """Runs the minimization pipeline. + + Args: + pdb_string: A pdb string. + max_iterations: An `int` specifying the maximum number of L-BFGS iterations. + A value of 0 specifies no limit. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + restraint_set: The set of atoms to restrain. + max_attempts: The maximum number of minimization attempts. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + use_gpu: Whether to run relaxation on GPU + Returns: + A `dict` of minimization info. + """ + exclude_residues = exclude_residues or [] + + # Assign physical dimensions. + tolerance = tolerance * ENERGY + stiffness = stiffness * ENERGY / (LENGTH ** 2) + + start = time.perf_counter() + minimized = False + attempts = 0 + while not minimized and attempts < max_attempts: + attempts += 1 + try: + logging.info( + "Minimizing protein, attempt %d of %d.", attempts, max_attempts + ) + ret = _openmm_minimize( + pdb_string, + max_iterations=max_iterations, + tolerance=tolerance, + stiffness=stiffness, + restraint_set=restraint_set, + exclude_residues=exclude_residues, + use_gpu=use_gpu, + ) + minimized = True + except Exception as e: # pylint: disable=broad-except + print(e) + logging.info(e) + if not minimized: + raise ValueError(f"Minimization failed after {max_attempts} attempts.") + ret["opt_time"] = time.perf_counter() - start + ret["min_attempts"] = attempts + return ret + + +def run_pipeline( + prot: protein.Protein, + stiffness: float, + use_gpu: bool, + max_outer_iterations: int = 1, + place_hydrogens_every_iteration: bool = True, + max_iterations: int = 0, + tolerance: float = 2.39, + restraint_set: str = "non_hydrogen", + max_attempts: int = 100, + checks: bool = True, + exclude_residues: Optional[Sequence[int]] = None, +): + """Run iterative amber relax. + + Successive relax iterations are performed until all violations have been + resolved. Each iteration involves a restrained Amber minimization, with + restraint exclusions determined by violation-participating residues. + + Args: + prot: A protein to be relaxed. + stiffness: kcal/mol A**2, the restraint stiffness. + use_gpu: Whether to run on GPU + max_outer_iterations: The maximum number of iterative minimization. + place_hydrogens_every_iteration: Whether hydrogens are re-initialized + prior to every minimization. + max_iterations: An `int` specifying the maximum number of L-BFGS steps + per relax iteration. A value of 0 specifies no limit. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + The default value is the OpenMM default. + restraint_set: The set of atoms to restrain. + max_attempts: The maximum number of minimization attempts per iteration. + checks: Whether to perform cleaning checks. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + + Returns: + out: A dictionary of output values. + """ + + # `protein.to_pdb` will strip any poorly-defined residues so we need to + # perform this check before `clean_protein`. + _check_residues_are_well_defined(prot) + pdb_string = clean_protein(prot, checks=checks) + + exclude_residues = exclude_residues or [] + exclude_residues = set(exclude_residues) + violations = np.inf + iteration = 0 + + while violations > 0 and iteration < max_outer_iterations: + ret = _run_one_iteration( + pdb_string=pdb_string, + exclude_residues=exclude_residues, + max_iterations=max_iterations, + tolerance=tolerance, + stiffness=stiffness, + restraint_set=restraint_set, + max_attempts=max_attempts, + use_gpu=use_gpu, + ) + prot = protein.from_pdb_string(ret["min_pdb"]) + if place_hydrogens_every_iteration: + pdb_string = clean_protein(prot, checks=True) + else: + pdb_string = ret["min_pdb"] + ret.update(get_violation_metrics(prot)) + ret.update( + { + "num_exclusions": len(exclude_residues), + "iteration": iteration, + } + ) + violations = ret["violations_per_residue"] + exclude_residues = exclude_residues.union(ret["residue_violations"]) + + logging.info( + "Iteration completed: Einit %.2f Efinal %.2f Time %.2f s " + "num residue violations %d num residue exclusions %d ", + ret["einit"], + ret["efinal"], + ret["opt_time"], + ret["num_residue_violations"], + ret["num_exclusions"], + ) + iteration += 1 + return ret + + +def get_initial_energies( + pdb_strs: Sequence[str], + stiffness: float = 0.0, + restraint_set: str = "non_hydrogen", + exclude_residues: Optional[Sequence[int]] = None, +): + """Returns initial potential energies for a sequence of PDBs. + + Assumes the input PDBs are ready for minimization, and all have the same + topology. + Allows time to be saved by not pdbfixing / rebuilding the system. + + Args: + pdb_strs: List of PDB strings. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + restraint_set: Which atom types to restrain. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + + Returns: + A list of initial energies in the same order as pdb_strs. + """ + exclude_residues = exclude_residues or [] + + openmm_pdbs = [ + openmm_app.PDBFile(PdbStructure(io.StringIO(p))) for p in pdb_strs + ] + force_field = openmm_app.ForceField("amber99sb.xml") + system = force_field.createSystem( + openmm_pdbs[0].topology, constraints=openmm_app.HBonds + ) + stiffness = stiffness * ENERGY / (LENGTH ** 2) + if stiffness > 0 * ENERGY / (LENGTH ** 2): + _add_restraints( + system, openmm_pdbs[0], stiffness, restraint_set, exclude_residues + ) + simulation = openmm_app.Simulation( + openmm_pdbs[0].topology, + system, + openmm.LangevinIntegrator(0, 0.01, 0.0), + openmm.Platform.getPlatformByName("CPU"), + ) + energies = [] + for pdb in openmm_pdbs: + try: + simulation.context.setPositions(pdb.positions) + state = simulation.context.getState(getEnergy=True) + energies.append(state.getPotentialEnergy().value_in_unit(ENERGY)) + except Exception as e: # pylint: disable=broad-except + logging.error( + "Error getting initial energy, returning large value %s", e + ) + energies.append(unit.Quantity(1e20, ENERGY)) + return energies diff --git a/openfold/np/relax/cleanup.py b/openfold/np/relax/cleanup.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f59a48b3b2184c68ccbbce9048b6a137086ca4 --- /dev/null +++ b/openfold/np/relax/cleanup.py @@ -0,0 +1,131 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cleans up a PDB file using pdbfixer in preparation for OpenMM simulations. + +fix_pdb uses a third-party tool. We also support fixing some additional edge +cases like removing chains of length one (see clean_structure). +""" +import io + +import pdbfixer +from simtk.openmm import app +from simtk.openmm.app import element + + +def fix_pdb(pdbfile, alterations_info): + """Apply pdbfixer to the contents of a PDB file; return a PDB string result. + + 1) Replaces nonstandard residues. + 2) Removes heterogens (non protein residues) including water. + 3) Adds missing residues and missing atoms within existing residues. + 4) Adds hydrogens assuming pH=7.0. + 5) KeepIds is currently true, so the fixer must keep the existing chain and + residue identifiers. This will fail for some files in wider PDB that have + invalid IDs. + + Args: + pdbfile: Input PDB file handle. + alterations_info: A dict that will store details of changes made. + + Returns: + A PDB string representing the fixed structure. + """ + fixer = pdbfixer.PDBFixer(pdbfile=pdbfile) + fixer.findNonstandardResidues() + alterations_info["nonstandard_residues"] = fixer.nonstandardResidues + fixer.replaceNonstandardResidues() + _remove_heterogens(fixer, alterations_info, keep_water=False) + fixer.findMissingResidues() + alterations_info["missing_residues"] = fixer.missingResidues + fixer.findMissingAtoms() + alterations_info["missing_heavy_atoms"] = fixer.missingAtoms + alterations_info["missing_terminals"] = fixer.missingTerminals + fixer.addMissingAtoms(seed=0) + fixer.addMissingHydrogens() + out_handle = io.StringIO() + app.PDBFile.writeFile( + fixer.topology, fixer.positions, out_handle, keepIds=True + ) + return out_handle.getvalue() + + +def clean_structure(pdb_structure, alterations_info): + """Applies additional fixes to an OpenMM structure, to handle edge cases. + + Args: + pdb_structure: An OpenMM structure to modify and fix. + alterations_info: A dict that will store details of changes made. + """ + _replace_met_se(pdb_structure, alterations_info) + _remove_chains_of_length_one(pdb_structure, alterations_info) + + +def _remove_heterogens(fixer, alterations_info, keep_water): + """Removes the residues that Pdbfixer considers to be heterogens. + + Args: + fixer: A Pdbfixer instance. + alterations_info: A dict that will store details of changes made. + keep_water: If True, water (HOH) is not considered to be a heterogen. + """ + initial_resnames = set() + for chain in fixer.topology.chains(): + for residue in chain.residues(): + initial_resnames.add(residue.name) + fixer.removeHeterogens(keepWater=keep_water) + final_resnames = set() + for chain in fixer.topology.chains(): + for residue in chain.residues(): + final_resnames.add(residue.name) + alterations_info["removed_heterogens"] = initial_resnames.difference( + final_resnames + ) + + +def _replace_met_se(pdb_structure, alterations_info): + """Replace the Se in any MET residues that were not marked as modified.""" + modified_met_residues = [] + for res in pdb_structure.iter_residues(): + name = res.get_name_with_spaces().strip() + if name == "MET": + s_atom = res.get_atom("SD") + if s_atom.element_symbol == "Se": + s_atom.element_symbol = "S" + s_atom.element = element.get_by_symbol("S") + modified_met_residues.append(s_atom.residue_number) + alterations_info["Se_in_MET"] = modified_met_residues + + +def _remove_chains_of_length_one(pdb_structure, alterations_info): + """Removes chains that correspond to a single amino acid. + + A single amino acid in a chain is both N and C terminus. There is no force + template for this case. + + Args: + pdb_structure: An OpenMM pdb_structure to modify and fix. + alterations_info: A dict that will store details of changes made. + """ + removed_chains = {} + for model in pdb_structure.iter_models(): + valid_chains = [c for c in model.iter_chains() if len(c) > 1] + invalid_chain_ids = [ + c.chain_id for c in model.iter_chains() if len(c) <= 1 + ] + model.chains = valid_chains + for chain_id in invalid_chain_ids: + model.chains_by_id.pop(chain_id) + removed_chains[model.number] = invalid_chain_ids + alterations_info["removed_chains"] = removed_chains diff --git a/openfold/np/relax/relax.py b/openfold/np/relax/relax.py new file mode 100644 index 0000000000000000000000000000000000000000..8feee742d033ee3165f5975cb14e13c599bfb364 --- /dev/null +++ b/openfold/np/relax/relax.py @@ -0,0 +1,90 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Amber relaxation.""" +from typing import Any, Dict, Sequence, Tuple +from openfold.np import protein +from openfold.np.relax import amber_minimize, utils +import numpy as np + + +class AmberRelaxation(object): + """Amber relaxation.""" + def __init__( + self, + *, + max_iterations: int, + tolerance: float, + stiffness: float, + exclude_residues: Sequence[int], + max_outer_iterations: int, + use_gpu: bool, + ): + """Initialize Amber Relaxer. + + Args: + max_iterations: Maximum number of L-BFGS iterations. 0 means no max. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + exclude_residues: Residues to exclude from per-atom restraining. + Zero-indexed. + max_outer_iterations: Maximum number of violation-informed relax + iterations. A value of 1 will run the non-iterative procedure used in + CASP14. Use 20 so that >95% of the bad cases are relaxed. Relax finishes + as soon as there are no violations, hence in most cases this causes no + slowdown. In the worst case we do 20 outer iterations. + use_gpu: Whether to run on GPU + """ + + self._max_iterations = max_iterations + self._tolerance = tolerance + self._stiffness = stiffness + self._exclude_residues = exclude_residues + self._max_outer_iterations = max_outer_iterations + self._use_gpu = use_gpu + + def process( + self, *, prot: protein.Protein + ) -> Tuple[str, Dict[str, Any], np.ndarray]: + """Runs Amber relax on a prediction, adds hydrogens, returns PDB string.""" + out = amber_minimize.run_pipeline( + prot=prot, + max_iterations=self._max_iterations, + tolerance=self._tolerance, + stiffness=self._stiffness, + exclude_residues=self._exclude_residues, + max_outer_iterations=self._max_outer_iterations, + use_gpu=self._use_gpu, + ) + min_pos = out["pos"] + start_pos = out["posinit"] + rmsd = np.sqrt(np.sum((start_pos - min_pos) ** 2) / start_pos.shape[0]) + debug_data = { + "initial_energy": out["einit"], + "final_energy": out["efinal"], + "attempts": out["min_attempts"], + "rmsd": rmsd, + } + pdb_str = amber_minimize.clean_protein(prot) + min_pdb = utils.overwrite_pdb_coordinates(pdb_str, min_pos) + min_pdb = utils.overwrite_b_factors(min_pdb, prot.b_factors) + utils.assert_equal_nonterminal_atom_types( + protein.from_pdb_string(min_pdb).atom_mask, prot.atom_mask + ) + violations = out["structural_violations"][ + "total_per_residue_violations_mask" + ] + return min_pdb, debug_data, violations diff --git a/openfold/np/relax/utils.py b/openfold/np/relax/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1e41aea524a97ddd563dc32963b4671f438c7ae4 --- /dev/null +++ b/openfold/np/relax/utils.py @@ -0,0 +1,88 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for minimization.""" +import io +from openfold.np import residue_constants +from Bio import PDB +import numpy as np +# simtk.openmm is not supported anymore. Remove simtk. +# https://github.com/openmm/openmm/releases +from openmm import app as openmm_app +from openmm.app.internal.pdbstructure import PdbStructure + + +def overwrite_pdb_coordinates(pdb_str: str, pos) -> str: + pdb_file = io.StringIO(pdb_str) + structure = PdbStructure(pdb_file) + topology = openmm_app.PDBFile(structure).getTopology() + with io.StringIO() as f: + openmm_app.PDBFile.writeFile(topology, pos, f) + return f.getvalue() + + +def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str: + """Overwrites the B-factors in pdb_str with contents of bfactors array. + + Args: + pdb_str: An input PDB string. + bfactors: A numpy array with shape [1, n_residues, 37]. We assume that the + B-factors are per residue; i.e. that the nonzero entries are identical in + [0, i, :]. + + Returns: + A new PDB string with the B-factors replaced. + """ + if bfactors.shape[-1] != residue_constants.atom_type_num: + raise ValueError( + f"Invalid final dimension size for bfactors: {bfactors.shape[-1]}." + ) + + parser = PDB.PDBParser(QUIET=True) + handle = io.StringIO(pdb_str) + structure = parser.get_structure("", handle) + + curr_resid = ("", "", "") + idx = -1 + for atom in structure.get_atoms(): + atom_resid = atom.parent.get_id() + if atom_resid != curr_resid: + idx += 1 + if idx >= bfactors.shape[0]: + raise ValueError( + "Index into bfactors exceeds number of residues. " + "B-factors shape: {shape}, idx: {idx}." + ) + curr_resid = atom_resid + atom.bfactor = bfactors[idx, residue_constants.atom_order["CA"]] + + new_pdb = io.StringIO() + pdb_io = PDB.PDBIO() + pdb_io.set_structure(structure) + pdb_io.save(new_pdb) + return new_pdb.getvalue() + + +def assert_equal_nonterminal_atom_types( + atom_mask: np.ndarray, ref_atom_mask: np.ndarray +): + """Checks that pre- and post-minimized proteins have same atom set.""" + # Ignore any terminal OXT atoms which may have been added by minimization. + oxt = residue_constants.atom_order["OXT"] + no_oxt_mask = np.ones(shape=atom_mask.shape, dtype=np.bool) + no_oxt_mask[..., oxt] = False + np.testing.assert_almost_equal( + ref_atom_mask[no_oxt_mask], atom_mask[no_oxt_mask] + ) diff --git a/openfold/np/residue_constants.py b/openfold/np/residue_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..ca3eae6fe48634e2dee88375c4a38c293305c5b0 --- /dev/null +++ b/openfold/np/residue_constants.py @@ -0,0 +1,1310 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Constants used in AlphaFold.""" + +import collections +import functools +from typing import Mapping, List, Tuple +from importlib import resources + +import numpy as np +import tree + +# Internal import (35fd). + + +# Distance from one CA to next CA [trans configuration: omega = 180]. +ca_ca = 3.80209737096 + +# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in +# this order (or a relevant subset from chi1 onwards). ALA and GLY don't have +# chi angles so their chi angle lists are empty. +chi_angles_atoms = { + "ALA": [], + # Chi5 in arginine is always 0 +- 5 degrees, so ignore it. + "ARG": [ + ["N", "CA", "CB", "CG"], + ["CA", "CB", "CG", "CD"], + ["CB", "CG", "CD", "NE"], + ["CG", "CD", "NE", "CZ"], + ], + "ASN": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "OD1"]], + "ASP": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "OD1"]], + "CYS": [["N", "CA", "CB", "SG"]], + "GLN": [ + ["N", "CA", "CB", "CG"], + ["CA", "CB", "CG", "CD"], + ["CB", "CG", "CD", "OE1"], + ], + "GLU": [ + ["N", "CA", "CB", "CG"], + ["CA", "CB", "CG", "CD"], + ["CB", "CG", "CD", "OE1"], + ], + "GLY": [], + "HIS": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "ND1"]], + "ILE": [["N", "CA", "CB", "CG1"], ["CA", "CB", "CG1", "CD1"]], + "LEU": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]], + "LYS": [ + ["N", "CA", "CB", "CG"], + ["CA", "CB", "CG", "CD"], + ["CB", "CG", "CD", "CE"], + ["CG", "CD", "CE", "NZ"], + ], + "MET": [ + ["N", "CA", "CB", "CG"], + ["CA", "CB", "CG", "SD"], + ["CB", "CG", "SD", "CE"], + ], + "PHE": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]], + "PRO": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD"]], + "SER": [["N", "CA", "CB", "OG"]], + "THR": [["N", "CA", "CB", "OG1"]], + "TRP": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]], + "TYR": [["N", "CA", "CB", "CG"], ["CA", "CB", "CG", "CD1"]], + "VAL": [["N", "CA", "CB", "CG1"]], +} + +# If chi angles given in fixed-length array, this matrix determines how to mask +# them for each AA type. The order is as per restype_order (see below). +chi_angles_mask = [ + [0.0, 0.0, 0.0, 0.0], # ALA + [1.0, 1.0, 1.0, 1.0], # ARG + [1.0, 1.0, 0.0, 0.0], # ASN + [1.0, 1.0, 0.0, 0.0], # ASP + [1.0, 0.0, 0.0, 0.0], # CYS + [1.0, 1.0, 1.0, 0.0], # GLN + [1.0, 1.0, 1.0, 0.0], # GLU + [0.0, 0.0, 0.0, 0.0], # GLY + [1.0, 1.0, 0.0, 0.0], # HIS + [1.0, 1.0, 0.0, 0.0], # ILE + [1.0, 1.0, 0.0, 0.0], # LEU + [1.0, 1.0, 1.0, 1.0], # LYS + [1.0, 1.0, 1.0, 0.0], # MET + [1.0, 1.0, 0.0, 0.0], # PHE + [1.0, 1.0, 0.0, 0.0], # PRO + [1.0, 0.0, 0.0, 0.0], # SER + [1.0, 0.0, 0.0, 0.0], # THR + [1.0, 1.0, 0.0, 0.0], # TRP + [1.0, 1.0, 0.0, 0.0], # TYR + [1.0, 0.0, 0.0, 0.0], # VAL +] + +# The following chi angles are pi periodic: they can be rotated by a multiple +# of pi without affecting the structure. +chi_pi_periodic = [ + [0.0, 0.0, 0.0, 0.0], # ALA + [0.0, 0.0, 0.0, 0.0], # ARG + [0.0, 0.0, 0.0, 0.0], # ASN + [0.0, 1.0, 0.0, 0.0], # ASP + [0.0, 0.0, 0.0, 0.0], # CYS + [0.0, 0.0, 0.0, 0.0], # GLN + [0.0, 0.0, 1.0, 0.0], # GLU + [0.0, 0.0, 0.0, 0.0], # GLY + [0.0, 0.0, 0.0, 0.0], # HIS + [0.0, 0.0, 0.0, 0.0], # ILE + [0.0, 0.0, 0.0, 0.0], # LEU + [0.0, 0.0, 0.0, 0.0], # LYS + [0.0, 0.0, 0.0, 0.0], # MET + [0.0, 1.0, 0.0, 0.0], # PHE + [0.0, 0.0, 0.0, 0.0], # PRO + [0.0, 0.0, 0.0, 0.0], # SER + [0.0, 0.0, 0.0, 0.0], # THR + [0.0, 0.0, 0.0, 0.0], # TRP + [0.0, 1.0, 0.0, 0.0], # TYR + [0.0, 0.0, 0.0, 0.0], # VAL + [0.0, 0.0, 0.0, 0.0], # UNK +] + +# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi, +# psi and chi angles: +# 0: 'backbone group', +# 1: 'pre-omega-group', (empty) +# 2: 'phi-group', (currently empty, because it defines only hydrogens) +# 3: 'psi-group', +# 4,5,6,7: 'chi1,2,3,4-group' +# The atom positions are relative to the axis-end-atom of the corresponding +# rotation axis. The x-axis is in direction of the rotation axis, and the y-axis +# is defined such that the dihedral-angle-definiting atom (the last entry in +# chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate). +# format: [atomname, group_idx, rel_position] +rigid_group_atom_positions = { + "ALA": [ + ["N", 0, (-0.525, 1.363, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, -0.000, -0.000)], + ["CB", 0, (-0.529, -0.774, -1.205)], + ["O", 3, (0.627, 1.062, 0.000)], + ], + "ARG": [ + ["N", 0, (-0.524, 1.362, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, -0.000, -0.000)], + ["CB", 0, (-0.524, -0.778, -1.209)], + ["O", 3, (0.626, 1.062, 0.000)], + ["CG", 4, (0.616, 1.390, -0.000)], + ["CD", 5, (0.564, 1.414, 0.000)], + ["NE", 6, (0.539, 1.357, -0.000)], + ["NH1", 7, (0.206, 2.301, 0.000)], + ["NH2", 7, (2.078, 0.978, -0.000)], + ["CZ", 7, (0.758, 1.093, -0.000)], + ], + "ASN": [ + ["N", 0, (-0.536, 1.357, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, -0.000, -0.000)], + ["CB", 0, (-0.531, -0.787, -1.200)], + ["O", 3, (0.625, 1.062, 0.000)], + ["CG", 4, (0.584, 1.399, 0.000)], + ["ND2", 5, (0.593, -1.188, 0.001)], + ["OD1", 5, (0.633, 1.059, 0.000)], + ], + "ASP": [ + ["N", 0, (-0.525, 1.362, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.527, 0.000, -0.000)], + ["CB", 0, (-0.526, -0.778, -1.208)], + ["O", 3, (0.626, 1.062, -0.000)], + ["CG", 4, (0.593, 1.398, -0.000)], + ["OD1", 5, (0.610, 1.091, 0.000)], + ["OD2", 5, (0.592, -1.101, -0.003)], + ], + "CYS": [ + ["N", 0, (-0.522, 1.362, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.524, 0.000, 0.000)], + ["CB", 0, (-0.519, -0.773, -1.212)], + ["O", 3, (0.625, 1.062, -0.000)], + ["SG", 4, (0.728, 1.653, 0.000)], + ], + "GLN": [ + ["N", 0, (-0.526, 1.361, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, 0.000, 0.000)], + ["CB", 0, (-0.525, -0.779, -1.207)], + ["O", 3, (0.626, 1.062, -0.000)], + ["CG", 4, (0.615, 1.393, 0.000)], + ["CD", 5, (0.587, 1.399, -0.000)], + ["NE2", 6, (0.593, -1.189, -0.001)], + ["OE1", 6, (0.634, 1.060, 0.000)], + ], + "GLU": [ + ["N", 0, (-0.528, 1.361, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, -0.000, -0.000)], + ["CB", 0, (-0.526, -0.781, -1.207)], + ["O", 3, (0.626, 1.062, 0.000)], + ["CG", 4, (0.615, 1.392, 0.000)], + ["CD", 5, (0.600, 1.397, 0.000)], + ["OE1", 6, (0.607, 1.095, -0.000)], + ["OE2", 6, (0.589, -1.104, -0.001)], + ], + "GLY": [ + ["N", 0, (-0.572, 1.337, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.517, -0.000, -0.000)], + ["O", 3, (0.626, 1.062, -0.000)], + ], + "HIS": [ + ["N", 0, (-0.527, 1.360, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, 0.000, 0.000)], + ["CB", 0, (-0.525, -0.778, -1.208)], + ["O", 3, (0.625, 1.063, 0.000)], + ["CG", 4, (0.600, 1.370, -0.000)], + ["CD2", 5, (0.889, -1.021, 0.003)], + ["ND1", 5, (0.744, 1.160, -0.000)], + ["CE1", 5, (2.030, 0.851, 0.002)], + ["NE2", 5, (2.145, -0.466, 0.004)], + ], + "ILE": [ + ["N", 0, (-0.493, 1.373, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.527, -0.000, -0.000)], + ["CB", 0, (-0.536, -0.793, -1.213)], + ["O", 3, (0.627, 1.062, -0.000)], + ["CG1", 4, (0.534, 1.437, -0.000)], + ["CG2", 4, (0.540, -0.785, -1.199)], + ["CD1", 5, (0.619, 1.391, 0.000)], + ], + "LEU": [ + ["N", 0, (-0.520, 1.363, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, -0.000, -0.000)], + ["CB", 0, (-0.522, -0.773, -1.214)], + ["O", 3, (0.625, 1.063, -0.000)], + ["CG", 4, (0.678, 1.371, 0.000)], + ["CD1", 5, (0.530, 1.430, -0.000)], + ["CD2", 5, (0.535, -0.774, 1.200)], + ], + "LYS": [ + ["N", 0, (-0.526, 1.362, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, 0.000, 0.000)], + ["CB", 0, (-0.524, -0.778, -1.208)], + ["O", 3, (0.626, 1.062, -0.000)], + ["CG", 4, (0.619, 1.390, 0.000)], + ["CD", 5, (0.559, 1.417, 0.000)], + ["CE", 6, (0.560, 1.416, 0.000)], + ["NZ", 7, (0.554, 1.387, 0.000)], + ], + "MET": [ + ["N", 0, (-0.521, 1.364, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, 0.000, 0.000)], + ["CB", 0, (-0.523, -0.776, -1.210)], + ["O", 3, (0.625, 1.062, -0.000)], + ["CG", 4, (0.613, 1.391, -0.000)], + ["SD", 5, (0.703, 1.695, 0.000)], + ["CE", 6, (0.320, 1.786, -0.000)], + ], + "PHE": [ + ["N", 0, (-0.518, 1.363, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.524, 0.000, -0.000)], + ["CB", 0, (-0.525, -0.776, -1.212)], + ["O", 3, (0.626, 1.062, -0.000)], + ["CG", 4, (0.607, 1.377, 0.000)], + ["CD1", 5, (0.709, 1.195, -0.000)], + ["CD2", 5, (0.706, -1.196, 0.000)], + ["CE1", 5, (2.102, 1.198, -0.000)], + ["CE2", 5, (2.098, -1.201, -0.000)], + ["CZ", 5, (2.794, -0.003, -0.001)], + ], + "PRO": [ + ["N", 0, (-0.566, 1.351, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.527, -0.000, 0.000)], + ["CB", 0, (-0.546, -0.611, -1.293)], + ["O", 3, (0.621, 1.066, 0.000)], + ["CG", 4, (0.382, 1.445, 0.0)], + # ['CD', 5, (0.427, 1.440, 0.0)], + ["CD", 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger + ], + "SER": [ + ["N", 0, (-0.529, 1.360, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, -0.000, -0.000)], + ["CB", 0, (-0.518, -0.777, -1.211)], + ["O", 3, (0.626, 1.062, -0.000)], + ["OG", 4, (0.503, 1.325, 0.000)], + ], + "THR": [ + ["N", 0, (-0.517, 1.364, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.526, 0.000, -0.000)], + ["CB", 0, (-0.516, -0.793, -1.215)], + ["O", 3, (0.626, 1.062, 0.000)], + ["CG2", 4, (0.550, -0.718, -1.228)], + ["OG1", 4, (0.472, 1.353, 0.000)], + ], + "TRP": [ + ["N", 0, (-0.521, 1.363, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.525, -0.000, 0.000)], + ["CB", 0, (-0.523, -0.776, -1.212)], + ["O", 3, (0.627, 1.062, 0.000)], + ["CG", 4, (0.609, 1.370, -0.000)], + ["CD1", 5, (0.824, 1.091, 0.000)], + ["CD2", 5, (0.854, -1.148, -0.005)], + ["CE2", 5, (2.186, -0.678, -0.007)], + ["CE3", 5, (0.622, -2.530, -0.007)], + ["NE1", 5, (2.140, 0.690, -0.004)], + ["CH2", 5, (3.028, -2.890, -0.013)], + ["CZ2", 5, (3.283, -1.543, -0.011)], + ["CZ3", 5, (1.715, -3.389, -0.011)], + ], + "TYR": [ + ["N", 0, (-0.522, 1.362, 0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.524, -0.000, -0.000)], + ["CB", 0, (-0.522, -0.776, -1.213)], + ["O", 3, (0.627, 1.062, -0.000)], + ["CG", 4, (0.607, 1.382, -0.000)], + ["CD1", 5, (0.716, 1.195, -0.000)], + ["CD2", 5, (0.713, -1.194, -0.001)], + ["CE1", 5, (2.107, 1.200, -0.002)], + ["CE2", 5, (2.104, -1.201, -0.003)], + ["OH", 5, (4.168, -0.002, -0.005)], + ["CZ", 5, (2.791, -0.001, -0.003)], + ], + "VAL": [ + ["N", 0, (-0.494, 1.373, -0.000)], + ["CA", 0, (0.000, 0.000, 0.000)], + ["C", 0, (1.527, -0.000, -0.000)], + ["CB", 0, (-0.533, -0.795, -1.213)], + ["O", 3, (0.627, 1.062, -0.000)], + ["CG1", 4, (0.540, 1.429, -0.000)], + ["CG2", 4, (0.533, -0.776, 1.203)], + ], +} + +# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention. +residue_atoms = { + "ALA": ["C", "CA", "CB", "N", "O"], + "ARG": ["C", "CA", "CB", "CG", "CD", "CZ", "N", "NE", "O", "NH1", "NH2"], + "ASP": ["C", "CA", "CB", "CG", "N", "O", "OD1", "OD2"], + "ASN": ["C", "CA", "CB", "CG", "N", "ND2", "O", "OD1"], + "CYS": ["C", "CA", "CB", "N", "O", "SG"], + "GLU": ["C", "CA", "CB", "CG", "CD", "N", "O", "OE1", "OE2"], + "GLN": ["C", "CA", "CB", "CG", "CD", "N", "NE2", "O", "OE1"], + "GLY": ["C", "CA", "N", "O"], + "HIS": ["C", "CA", "CB", "CG", "CD2", "CE1", "N", "ND1", "NE2", "O"], + "ILE": ["C", "CA", "CB", "CG1", "CG2", "CD1", "N", "O"], + "LEU": ["C", "CA", "CB", "CG", "CD1", "CD2", "N", "O"], + "LYS": ["C", "CA", "CB", "CG", "CD", "CE", "N", "NZ", "O"], + "MET": ["C", "CA", "CB", "CG", "CE", "N", "O", "SD"], + "PHE": ["C", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "N", "O"], + "PRO": ["C", "CA", "CB", "CG", "CD", "N", "O"], + "SER": ["C", "CA", "CB", "N", "O", "OG"], + "THR": ["C", "CA", "CB", "CG2", "N", "O", "OG1"], + "TRP": [ + "C", + "CA", + "CB", + "CG", + "CD1", + "CD2", + "CE2", + "CE3", + "CZ2", + "CZ3", + "CH2", + "N", + "NE1", + "O", + ], + "TYR": [ + "C", + "CA", + "CB", + "CG", + "CD1", + "CD2", + "CE1", + "CE2", + "CZ", + "N", + "O", + "OH", + ], + "VAL": ["C", "CA", "CB", "CG1", "CG2", "N", "O"], +} + +# Naming swaps for ambiguous atom names. +# Due to symmetries in the amino acids the naming of atoms is ambiguous in +# 4 of the 20 amino acids. +# (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities +# in LEU, VAL and ARG can be resolved by using the 3d constellations of +# the 'ambiguous' atoms and their neighbours) +# TODO: ^ interpret this +residue_atom_renaming_swaps = { + "ASP": {"OD1": "OD2"}, + "GLU": {"OE1": "OE2"}, + "PHE": {"CD1": "CD2", "CE1": "CE2"}, + "TYR": {"CD1": "CD2", "CE1": "CE2"}, +} + +# Van der Waals radii [Angstroem] of the atoms (from Wikipedia) +van_der_waals_radius = { + "C": 1.7, + "N": 1.55, + "O": 1.52, + "S": 1.8, +} + +Bond = collections.namedtuple( + "Bond", ["atom1_name", "atom2_name", "length", "stddev"] +) +BondAngle = collections.namedtuple( + "BondAngle", + ["atom1_name", "atom2_name", "atom3name", "angle_rad", "stddev"], +) + + +@functools.lru_cache(maxsize=None) +def load_stereo_chemical_props() -> Tuple[ + Mapping[str, List[Bond]], + Mapping[str, List[Bond]], + Mapping[str, List[BondAngle]], +]: + """Load stereo_chemical_props.txt into a nice structure. + + Load literature values for bond lengths and bond angles and translate + bond angles into the length of the opposite edge of the triangle + ("residue_virtual_bonds"). + + Returns: + residue_bonds: dict that maps resname --> list of Bond tuples + residue_virtual_bonds: dict that maps resname --> list of Bond tuples + residue_bond_angles: dict that maps resname --> list of BondAngle tuples + """ + # TODO: this file should be downloaded in a setup script + stereo_chemical_props = resources.read_text("openfold.resources", "stereo_chemical_props.txt") + + lines_iter = iter(stereo_chemical_props.splitlines()) + # Load bond lengths. + residue_bonds = {} + next(lines_iter) # Skip header line. + for line in lines_iter: + if line.strip() == "-": + break + bond, resname, length, stddev = line.split() + atom1, atom2 = bond.split("-") + if resname not in residue_bonds: + residue_bonds[resname] = [] + residue_bonds[resname].append( + Bond(atom1, atom2, float(length), float(stddev)) + ) + residue_bonds["UNK"] = [] + + # Load bond angles. + residue_bond_angles = {} + next(lines_iter) # Skip empty line. + next(lines_iter) # Skip header line. + for line in lines_iter: + if line.strip() == "-": + break + bond, resname, angle_degree, stddev_degree = line.split() + atom1, atom2, atom3 = bond.split("-") + if resname not in residue_bond_angles: + residue_bond_angles[resname] = [] + residue_bond_angles[resname].append( + BondAngle( + atom1, + atom2, + atom3, + float(angle_degree) / 180.0 * np.pi, + float(stddev_degree) / 180.0 * np.pi, + ) + ) + residue_bond_angles["UNK"] = [] + + def make_bond_key(atom1_name, atom2_name): + """Unique key to lookup bonds.""" + return "-".join(sorted([atom1_name, atom2_name])) + + # Translate bond angles into distances ("virtual bonds"). + residue_virtual_bonds = {} + for resname, bond_angles in residue_bond_angles.items(): + # Create a fast lookup dict for bond lengths. + bond_cache = {} + for b in residue_bonds[resname]: + bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b + residue_virtual_bonds[resname] = [] + for ba in bond_angles: + bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)] + bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)] + + # Compute distance between atom1 and atom3 using the law of cosines + # c^2 = a^2 + b^2 - 2ab*cos(gamma). + gamma = ba.angle_rad + length = np.sqrt( + bond1.length ** 2 + + bond2.length ** 2 + - 2 * bond1.length * bond2.length * np.cos(gamma) + ) + + # Propagation of uncertainty assuming uncorrelated errors. + dl_outer = 0.5 / length + dl_dgamma = ( + 2 * bond1.length * bond2.length * np.sin(gamma) + ) * dl_outer + dl_db1 = ( + 2 * bond1.length - 2 * bond2.length * np.cos(gamma) + ) * dl_outer + dl_db2 = ( + 2 * bond2.length - 2 * bond1.length * np.cos(gamma) + ) * dl_outer + stddev = np.sqrt( + (dl_dgamma * ba.stddev) ** 2 + + (dl_db1 * bond1.stddev) ** 2 + + (dl_db2 * bond2.stddev) ** 2 + ) + residue_virtual_bonds[resname].append( + Bond(ba.atom1_name, ba.atom3name, length, stddev) + ) + + return (residue_bonds, residue_virtual_bonds, residue_bond_angles) + + +# Between-residue bond lengths for general bonds (first element) and for Proline +# (second element). +between_res_bond_length_c_n = [1.329, 1.341] +between_res_bond_length_stddev_c_n = [0.014, 0.016] + +# Between-residue cos_angles. +between_res_cos_angles_c_n_ca = [-0.5203, 0.0353] # degrees: 121.352 +- 2.315 +between_res_cos_angles_ca_c_n = [-0.4473, 0.0311] # degrees: 116.568 +- 1.995 + +# This mapping is used when we need to store atom data in a format that requires +# fixed atom data size for every residue (e.g. a numpy array). +atom_types = [ + "N", + "CA", + "C", + "CB", + "O", + "CG", + "CG1", + "CG2", + "OG", + "OG1", + "SG", + "CD", + "CD1", + "CD2", + "ND1", + "ND2", + "OD1", + "OD2", + "SD", + "CE", + "CE1", + "CE2", + "CE3", + "NE", + "NE1", + "NE2", + "OE1", + "OE2", + "CH2", + "NH1", + "NH2", + "OH", + "CZ", + "CZ2", + "CZ3", + "NZ", + "OXT", +] +atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)} +atom_type_num = len(atom_types) # := 37. + +# A compact atom encoding with 14 columns +# pylint: disable=line-too-long +# pylint: disable=bad-whitespace +restype_name_to_atom14_names = { + "ALA": ["N", "CA", "C", "O", "CB", "", "", "", "", "", "", "", "", ""], + "ARG": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD", + "NE", + "CZ", + "NH1", + "NH2", + "", + "", + "", + ], + "ASN": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "OD1", + "ND2", + "", + "", + "", + "", + "", + "", + ], + "ASP": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "OD1", + "OD2", + "", + "", + "", + "", + "", + "", + ], + "CYS": ["N", "CA", "C", "O", "CB", "SG", "", "", "", "", "", "", "", ""], + "GLN": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD", + "OE1", + "NE2", + "", + "", + "", + "", + "", + ], + "GLU": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD", + "OE1", + "OE2", + "", + "", + "", + "", + "", + ], + "GLY": ["N", "CA", "C", "O", "", "", "", "", "", "", "", "", "", ""], + "HIS": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "ND1", + "CD2", + "CE1", + "NE2", + "", + "", + "", + "", + ], + "ILE": [ + "N", + "CA", + "C", + "O", + "CB", + "CG1", + "CG2", + "CD1", + "", + "", + "", + "", + "", + "", + ], + "LEU": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD1", + "CD2", + "", + "", + "", + "", + "", + "", + ], + "LYS": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD", + "CE", + "NZ", + "", + "", + "", + "", + "", + ], + "MET": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "SD", + "CE", + "", + "", + "", + "", + "", + "", + ], + "PHE": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD1", + "CD2", + "CE1", + "CE2", + "CZ", + "", + "", + "", + ], + "PRO": ["N", "CA", "C", "O", "CB", "CG", "CD", "", "", "", "", "", "", ""], + "SER": ["N", "CA", "C", "O", "CB", "OG", "", "", "", "", "", "", "", ""], + "THR": [ + "N", + "CA", + "C", + "O", + "CB", + "OG1", + "CG2", + "", + "", + "", + "", + "", + "", + "", + ], + "TRP": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD1", + "CD2", + "NE1", + "CE2", + "CE3", + "CZ2", + "CZ3", + "CH2", + ], + "TYR": [ + "N", + "CA", + "C", + "O", + "CB", + "CG", + "CD1", + "CD2", + "CE1", + "CE2", + "CZ", + "OH", + "", + "", + ], + "VAL": [ + "N", + "CA", + "C", + "O", + "CB", + "CG1", + "CG2", + "", + "", + "", + "", + "", + "", + "", + ], + "UNK": ["", "", "", "", "", "", "", "", "", "", "", "", "", ""], +} +# pylint: enable=line-too-long +# pylint: enable=bad-whitespace + + +# This is the standard residue order when coding AA type as a number. +# Reproduce it by taking 3-letter AA codes and sorting them alphabetically. +restypes = [ + "A", + "R", + "N", + "D", + "C", + "Q", + "E", + "G", + "H", + "I", + "L", + "K", + "M", + "F", + "P", + "S", + "T", + "W", + "Y", + "V", +] +restype_order = {restype: i for i, restype in enumerate(restypes)} +restype_num = len(restypes) # := 20. +unk_restype_index = restype_num # Catch-all index for unknown restypes. + +restypes_with_x = restypes + ["X"] +restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)} + + +def sequence_to_onehot( + sequence: str, mapping: Mapping[str, int], map_unknown_to_x: bool = False +) -> np.ndarray: + """Maps the given sequence into a one-hot encoded matrix. + + Args: + sequence: An amino acid sequence. + mapping: A dictionary mapping amino acids to integers. + map_unknown_to_x: If True, any amino acid that is not in the mapping will be + mapped to the unknown amino acid 'X'. If the mapping doesn't contain + amino acid 'X', an error will be thrown. If False, any amino acid not in + the mapping will throw an error. + + Returns: + A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of + the sequence. + + Raises: + ValueError: If the mapping doesn't contain values from 0 to + num_unique_aas - 1 without any gaps. + """ + num_entries = max(mapping.values()) + 1 + + if sorted(set(mapping.values())) != list(range(num_entries)): + raise ValueError( + "The mapping must have values from 0 to num_unique_aas-1 " + "without any gaps. Got: %s" % sorted(mapping.values()) + ) + + one_hot_arr = np.zeros((len(sequence), num_entries), dtype=int) + + for aa_index, aa_type in enumerate(sequence): + if map_unknown_to_x: + if aa_type.isalpha() and aa_type.isupper(): + aa_id = mapping.get(aa_type, mapping["X"]) + else: + raise ValueError( + f"Invalid character in the sequence: {aa_type}" + ) + else: + aa_id = mapping[aa_type] + one_hot_arr[aa_index, aa_id] = 1 + + return one_hot_arr + + +restype_1to3 = { + "A": "ALA", + "R": "ARG", + "N": "ASN", + "D": "ASP", + "C": "CYS", + "Q": "GLN", + "E": "GLU", + "G": "GLY", + "H": "HIS", + "I": "ILE", + "L": "LEU", + "K": "LYS", + "M": "MET", + "F": "PHE", + "P": "PRO", + "S": "SER", + "T": "THR", + "W": "TRP", + "Y": "TYR", + "V": "VAL", +} + + +# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple +# 1-to-1 mapping of 3 letter names to one letter names. The latter contains +# many more, and less common, three letter names as keys and maps many of these +# to the same one letter name (including 'X' and 'U' which we don't use here). +restype_3to1 = {v: k for k, v in restype_1to3.items()} + +# Define a restype name for all unknown residues. +unk_restype = "UNK" + +resnames = [restype_1to3[r] for r in restypes] + [unk_restype] +resname_to_idx = {resname: i for i, resname in enumerate(resnames)} + + +# The mapping here uses hhblits convention, so that B is mapped to D, J and O +# are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the +# remaining 20 amino acids are kept in alphabetical order. +# There are 2 non-amino acid codes, X (representing any amino acid) and +# "-" representing a missing amino acid in an alignment. The id for these +# codes is put at the end (20 and 21) so that they can easily be ignored if +# desired. +HHBLITS_AA_TO_ID = { + "A": 0, + "B": 2, + "C": 1, + "D": 2, + "E": 3, + "F": 4, + "G": 5, + "H": 6, + "I": 7, + "J": 20, + "K": 8, + "L": 9, + "M": 10, + "N": 11, + "O": 20, + "P": 12, + "Q": 13, + "R": 14, + "S": 15, + "T": 16, + "U": 1, + "V": 17, + "W": 18, + "X": 20, + "Y": 19, + "Z": 3, + "-": 21, +} + +# Partial inversion of HHBLITS_AA_TO_ID. +ID_TO_HHBLITS_AA = { + 0: "A", + 1: "C", # Also U. + 2: "D", # Also B. + 3: "E", # Also Z. + 4: "F", + 5: "G", + 6: "H", + 7: "I", + 8: "K", + 9: "L", + 10: "M", + 11: "N", + 12: "P", + 13: "Q", + 14: "R", + 15: "S", + 16: "T", + 17: "V", + 18: "W", + 19: "Y", + 20: "X", # Includes J and O. + 21: "-", +} + +restypes_with_x_and_gap = restypes + ["X", "-"] +MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple( + restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i]) + for i in range(len(restypes_with_x_and_gap)) +) + + +def _make_standard_atom_mask() -> np.ndarray: + """Returns [num_res_types, num_atom_types] mask array.""" + # +1 to account for unknown (all 0s). + mask = np.zeros([restype_num + 1, atom_type_num], dtype=int) + for restype, restype_letter in enumerate(restypes): + restype_name = restype_1to3[restype_letter] + atom_names = residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = atom_order[atom_name] + mask[restype, atom_type] = 1 + return mask + + +STANDARD_ATOM_MASK = _make_standard_atom_mask() + + +# A one hot representation for the first and second atoms defining the axis +# of rotation for each chi-angle in each residue. +def chi_angle_atom(atom_index: int) -> np.ndarray: + """Define chi-angle rigid groups via one-hot representations.""" + chi_angles_index = {} + one_hots = [] + + for k, v in chi_angles_atoms.items(): + indices = [atom_types.index(s[atom_index]) for s in v] + indices.extend([-1] * (4 - len(indices))) + chi_angles_index[k] = indices + + for r in restypes: + res3 = restype_1to3[r] + one_hot = np.eye(atom_type_num)[chi_angles_index[res3]] + one_hots.append(one_hot) + + one_hots.append(np.zeros([4, atom_type_num])) # Add zeros for residue `X`. + one_hot = np.stack(one_hots, axis=0) + one_hot = np.transpose(one_hot, [0, 2, 1]) + + return one_hot + + +chi_atom_1_one_hot = chi_angle_atom(1) +chi_atom_2_one_hot = chi_angle_atom(2) + +# An array like chi_angles_atoms but using indices rather than names. +chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes] +chi_angles_atom_indices = tree.map_structure( + lambda atom_name: atom_order[atom_name], chi_angles_atom_indices +) +chi_angles_atom_indices = np.array( + [ + chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms))) + for chi_atoms in chi_angles_atom_indices + ] +) + +# Mapping from (res_name, atom_name) pairs to the atom's chi group index +# and atom index within that group. +chi_groups_for_atom = collections.defaultdict(list) +for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items(): + for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res): + for atom_i, atom in enumerate(chi_group): + chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i)) +chi_groups_for_atom = dict(chi_groups_for_atom) + + +def _make_rigid_transformation_4x4(ex, ey, translation): + """Create a rigid 4x4 transformation matrix from two axes and transl.""" + # Normalize ex. + ex_normalized = ex / np.linalg.norm(ex) + + # make ey perpendicular to ex + ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized + ey_normalized /= np.linalg.norm(ey_normalized) + + # compute ez as cross product + eznorm = np.cross(ex_normalized, ey_normalized) + m = np.stack( + [ex_normalized, ey_normalized, eznorm, translation] + ).transpose() + m = np.concatenate([m, [[0.0, 0.0, 0.0, 1.0]]], axis=0) + return m + + +# create an array with (restype, atomtype) --> rigid_group_idx +# and an array with (restype, atomtype, coord) for the atom positions +# and compute affine transformation matrices (4,4) from one rigid group to the +# previous group +restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=int) +restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) +restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32) +restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=int) +restype_atom14_mask = np.zeros([21, 14], dtype=np.float32) +restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32) +restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32) + + +def _make_rigid_group_constants(): + """Fill the arrays above.""" + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + for atomname, group_idx, atom_position in rigid_group_atom_positions[ + resname + ]: + atomtype = atom_order[atomname] + restype_atom37_to_rigid_group[restype, atomtype] = group_idx + restype_atom37_mask[restype, atomtype] = 1 + restype_atom37_rigid_group_positions[ + restype, atomtype, : + ] = atom_position + + atom14idx = restype_name_to_atom14_names[resname].index(atomname) + restype_atom14_to_rigid_group[restype, atom14idx] = group_idx + restype_atom14_mask[restype, atom14idx] = 1 + restype_atom14_rigid_group_positions[ + restype, atom14idx, : + ] = atom_position + + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + atom_positions = { + name: np.array(pos) + for name, _, pos in rigid_group_atom_positions[resname] + } + + # backbone to backbone is the identity transform + restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4) + + # pre-omega-frame to backbone (currently dummy identity matrix) + restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4) + + # phi-frame to backbone + mat = _make_rigid_transformation_4x4( + ex=atom_positions["N"] - atom_positions["CA"], + ey=np.array([1.0, 0.0, 0.0]), + translation=atom_positions["N"], + ) + restype_rigid_group_default_frame[restype, 2, :, :] = mat + + # psi-frame to backbone + mat = _make_rigid_transformation_4x4( + ex=atom_positions["C"] - atom_positions["CA"], + ey=atom_positions["CA"] - atom_positions["N"], + translation=atom_positions["C"], + ) + restype_rigid_group_default_frame[restype, 3, :, :] = mat + + # chi1-frame to backbone + if chi_angles_mask[restype][0]: + base_atom_names = chi_angles_atoms[resname][0] + base_atom_positions = [ + atom_positions[name] for name in base_atom_names + ] + mat = _make_rigid_transformation_4x4( + ex=base_atom_positions[2] - base_atom_positions[1], + ey=base_atom_positions[0] - base_atom_positions[1], + translation=base_atom_positions[2], + ) + restype_rigid_group_default_frame[restype, 4, :, :] = mat + + # chi2-frame to chi1-frame + # chi3-frame to chi2-frame + # chi4-frame to chi3-frame + # luckily all rotation axes for the next frame start at (0,0,0) of the + # previous frame + for chi_idx in range(1, 4): + if chi_angles_mask[restype][chi_idx]: + axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2] + axis_end_atom_position = atom_positions[axis_end_atom_name] + mat = _make_rigid_transformation_4x4( + ex=axis_end_atom_position, + ey=np.array([-1.0, 0.0, 0.0]), + translation=axis_end_atom_position, + ) + restype_rigid_group_default_frame[ + restype, 4 + chi_idx, :, : + ] = mat + + +_make_rigid_group_constants() + + +def make_atom14_dists_bounds( + overlap_tolerance=1.5, bond_length_tolerance_factor=15 +): + """compute upper and lower bounds for bonds to assess violations.""" + restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32) + restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32) + restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32) + residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props() + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + atom_list = restype_name_to_atom14_names[resname] + + # create lower and upper bounds for clashes + for atom1_idx, atom1_name in enumerate(atom_list): + if not atom1_name: + continue + atom1_radius = van_der_waals_radius[atom1_name[0]] + for atom2_idx, atom2_name in enumerate(atom_list): + if (not atom2_name) or atom1_idx == atom2_idx: + continue + atom2_radius = van_der_waals_radius[atom2_name[0]] + lower = atom1_radius + atom2_radius - overlap_tolerance + upper = 1e10 + restype_atom14_bond_lower_bound[ + restype, atom1_idx, atom2_idx + ] = lower + restype_atom14_bond_lower_bound[ + restype, atom2_idx, atom1_idx + ] = lower + restype_atom14_bond_upper_bound[ + restype, atom1_idx, atom2_idx + ] = upper + restype_atom14_bond_upper_bound[ + restype, atom2_idx, atom1_idx + ] = upper + + # overwrite lower and upper bounds for bonds and angles + for b in residue_bonds[resname] + residue_virtual_bonds[resname]: + atom1_idx = atom_list.index(b.atom1_name) + atom2_idx = atom_list.index(b.atom2_name) + lower = b.length - bond_length_tolerance_factor * b.stddev + upper = b.length + bond_length_tolerance_factor * b.stddev + restype_atom14_bond_lower_bound[ + restype, atom1_idx, atom2_idx + ] = lower + restype_atom14_bond_lower_bound[ + restype, atom2_idx, atom1_idx + ] = lower + restype_atom14_bond_upper_bound[ + restype, atom1_idx, atom2_idx + ] = upper + restype_atom14_bond_upper_bound[ + restype, atom2_idx, atom1_idx + ] = upper + restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev + restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev + return { + "lower_bound": restype_atom14_bond_lower_bound, # shape (21,14,14) + "upper_bound": restype_atom14_bond_upper_bound, # shape (21,14,14) + "stddev": restype_atom14_bond_stddev, # shape (21,14,14) + } + + +restype_atom14_ambiguous_atoms = np.zeros((21, 14), dtype=np.float32) +restype_atom14_ambiguous_atoms_swap_idx = np.tile( + np.arange(14, dtype=int), (21, 1) +) + + +def _make_atom14_ambiguity_feats(): + for res, pairs in residue_atom_renaming_swaps.items(): + res_idx = restype_order[restype_3to1[res]] + for atom1, atom2 in pairs.items(): + atom1_idx = restype_name_to_atom14_names[res].index(atom1) + atom2_idx = restype_name_to_atom14_names[res].index(atom2) + restype_atom14_ambiguous_atoms[res_idx, atom1_idx] = 1 + restype_atom14_ambiguous_atoms[res_idx, atom2_idx] = 1 + restype_atom14_ambiguous_atoms_swap_idx[ + res_idx, atom1_idx + ] = atom2_idx + restype_atom14_ambiguous_atoms_swap_idx[ + res_idx, atom2_idx + ] = atom1_idx + + +_make_atom14_ambiguity_feats() + + +def aatype_to_str_sequence(aatype): + return ''.join([ + restypes_with_x[aatype[i]] + for i in range(len(aatype)) + ]) diff --git a/openfold/resources/__init__.py b/openfold/resources/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/openfold/utils/argparse.py b/openfold/utils/argparse.py new file mode 100644 index 0000000000000000000000000000000000000000..d13f895ec418ef74af1c1ffd7ec227cdd691236f --- /dev/null +++ b/openfold/utils/argparse.py @@ -0,0 +1,30 @@ +from argparse import HelpFormatter +from operator import attrgetter + +class ArgparseAlphabetizer(HelpFormatter): + """ + Sorts the optional arguments of an argparse parser alphabetically + """ + + @staticmethod + def sort_actions(actions): + return sorted(actions, key=attrgetter("option_strings")) + + # Formats the help message + def add_arguments(self, actions): + actions = ArgparseAlphabetizer.sort_actions(actions) + super(ArgparseAlphabetizer, self).add_arguments(actions) + + # Formats the usage message + def add_usage(self, usage, actions, groups, prefix=None): + actions = ArgparseAlphabetizer.sort_actions(actions) + args = usage, actions, groups, prefix + super(ArgparseAlphabetizer, self).add_usage(*args) + + +def remove_arguments(parser, args): + for arg in args: + for action in parser._actions: + opts = vars(action)["option_strings"] + if(arg in opts): + parser._handle_conflict_resolve(None, [(arg, action)]) diff --git a/openfold/utils/callbacks.py b/openfold/utils/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..872dbdce98bcca08e58368481c05024986b4a71f --- /dev/null +++ b/openfold/utils/callbacks.py @@ -0,0 +1,14 @@ +from pytorch_lightning.utilities import rank_zero_info +from pytorch_lightning.callbacks.early_stopping import EarlyStopping + +class EarlyStoppingVerbose(EarlyStopping): + """ + The default EarlyStopping callback's verbose mode is too verbose. + This class outputs a message only when it's getting ready to stop. + """ + def _evalute_stopping_criteria(self, *args): + should_stop, reason = super()._evalute_stopping_criteria(*args) + if(should_stop): + rank_zero_info(f"{reason}\n") + + return should_stop, reason diff --git a/openfold/utils/checkpointing.py b/openfold/utils/checkpointing.py new file mode 100644 index 0000000000000000000000000000000000000000..75a4455ae6b1581d76dab7bcf3adf590e9be15df --- /dev/null +++ b/openfold/utils/checkpointing.py @@ -0,0 +1,88 @@ +# Copyright 2021 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import deepspeed +import torch +import torch.utils.checkpoint +from typing import Any, Tuple, List, Callable, Optional + + +BLOCK_ARG = Any +BLOCK_ARGS = List[BLOCK_ARG] + + +def get_checkpoint_fn(): + if(deepspeed.checkpointing.is_configured()): + checkpoint = deepspeed.checkpointing.checkpoint + else: + checkpoint = torch.utils.checkpoint.checkpoint + + return checkpoint + + +@torch.jit.ignore +def checkpoint_blocks( + blocks: List[Callable], + args: BLOCK_ARGS, + blocks_per_ckpt: Optional[int], +) -> BLOCK_ARGS: + """ + Chunk a list of blocks and run each chunk with activation + checkpointing. We define a "block" as a callable whose only inputs are + the outputs of the previous block. + + Implements Subsection 1.11.8 + + Args: + blocks: + List of blocks + args: + Tuple of arguments for the first block. + blocks_per_ckpt: + Size of each chunk. A higher value corresponds to fewer + checkpoints, and trades memory for speed. If None, no checkpointing + is performed. + Returns: + The output of the final block + """ + def wrap(a): + return (a,) if type(a) is not tuple else a + + def exec(b, a): + for block in b: + a = wrap(block(*a)) + return a + + def chunker(s, e): + def exec_sliced(*a): + return exec(blocks[s:e], a) + + return exec_sliced + + # Avoids mishaps when the blocks take just one argument + args = wrap(args) + + if blocks_per_ckpt is None: + return exec(blocks, args) + elif blocks_per_ckpt < 1 or blocks_per_ckpt > len(blocks): + raise ValueError("blocks_per_ckpt must be between 1 and len(blocks)") + + checkpoint = get_checkpoint_fn() + + for s in range(0, len(blocks), blocks_per_ckpt): + e = s + blocks_per_ckpt + args = checkpoint(chunker(s, e), *args) + args = wrap(args) + + return args \ No newline at end of file diff --git a/openfold/utils/exponential_moving_average.py b/openfold/utils/exponential_moving_average.py new file mode 100644 index 0000000000000000000000000000000000000000..53649506a7af46c7530687c14c79c2cb8d43c089 --- /dev/null +++ b/openfold/utils/exponential_moving_average.py @@ -0,0 +1,70 @@ +from collections import OrderedDict +import copy +import torch +import torch.nn as nn + +from openfold.utils.tensor_utils import tensor_tree_map + + +class ExponentialMovingAverage: + """ + Maintains moving averages of parameters with exponential decay + + At each step, the stored copy `copy` of each parameter `param` is + updated as follows: + + `copy = decay * copy + (1 - decay) * param` + + where `decay` is an attribute of the ExponentialMovingAverage object. + """ + + def __init__(self, model: nn.Module, decay: float): + """ + Args: + model: + A torch.nn.Module whose parameters are to be tracked + decay: + A value (usually close to 1.) by which updates are + weighted as part of the above formula + """ + super(ExponentialMovingAverage, self).__init__() + + clone_param = lambda t: t.clone().detach() + self.params = tensor_tree_map(clone_param, model.state_dict()) + self.decay = decay + self.device = next(model.parameters()).device + + def to(self, device): + self.params = tensor_tree_map(lambda t: t.to(device), self.params) + self.device = device + + def _update_state_dict_(self, update, state_dict): + with torch.no_grad(): + for k, v in update.items(): + stored = state_dict[k] + if not isinstance(v, torch.Tensor): + self._update_state_dict_(v, stored) + else: + diff = stored - v + diff *= 1 - self.decay + stored -= diff + + def update(self, model: torch.nn.Module) -> None: + """ + Updates the stored parameters using the state dict of the provided + module. The module should have the same structure as that used to + initialize the ExponentialMovingAverage object. + """ + self._update_state_dict_(model.state_dict(), self.params) + + def load_state_dict(self, state_dict: OrderedDict) -> None: + self.params = state_dict["params"] + self.decay = state_dict["decay"] + + def state_dict(self) -> OrderedDict: + return OrderedDict( + { + "params": self.params, + "decay": self.decay, + } + ) diff --git a/openfold/utils/feats.py b/openfold/utils/feats.py new file mode 100644 index 0000000000000000000000000000000000000000..3be298ff4cfd578d446142826937b5643551db29 --- /dev/null +++ b/openfold/utils/feats.py @@ -0,0 +1,267 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import numpy as np +import torch +import torch.nn as nn +from typing import Dict + +from openfold.np import protein +import openfold.np.residue_constants as rc +from openfold.utils.rigid_utils import Rotation, Rigid +from openfold.utils.tensor_utils import ( + batched_gather, + one_hot, + tree_map, + tensor_tree_map, +) + + +def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks): + is_gly = aatype == rc.restype_order["G"] + ca_idx = rc.atom_order["CA"] + cb_idx = rc.atom_order["CB"] + pseudo_beta = torch.where( + is_gly[..., None].expand(*((-1,) * len(is_gly.shape)), 3), + all_atom_positions[..., ca_idx, :], + all_atom_positions[..., cb_idx, :], + ) + + if all_atom_masks is not None: + pseudo_beta_mask = torch.where( + is_gly, + all_atom_masks[..., ca_idx], + all_atom_masks[..., cb_idx], + ) + return pseudo_beta, pseudo_beta_mask + else: + return pseudo_beta + + +def atom14_to_atom37(atom14, batch): + atom37_data = batched_gather( + atom14, + batch["residx_atom37_to_atom14"], + dim=-2, + no_batch_dims=len(atom14.shape[:-2]), + ) + + atom37_data = atom37_data * batch["atom37_atom_exists"][..., None] + + return atom37_data + + +def build_template_angle_feat(template_feats): + template_aatype = template_feats["template_aatype"] + torsion_angles_sin_cos = template_feats["template_torsion_angles_sin_cos"] + alt_torsion_angles_sin_cos = template_feats[ + "template_alt_torsion_angles_sin_cos" + ] + torsion_angles_mask = template_feats["template_torsion_angles_mask"] + template_angle_feat = torch.cat( + [ + nn.functional.one_hot(template_aatype, 22), + torsion_angles_sin_cos.reshape( + *torsion_angles_sin_cos.shape[:-2], 14 + ), + alt_torsion_angles_sin_cos.reshape( + *alt_torsion_angles_sin_cos.shape[:-2], 14 + ), + torsion_angles_mask, + ], + dim=-1, + ) + + return template_angle_feat + + +def build_template_pair_feat( + batch, min_bin, max_bin, no_bins, eps=1e-20, inf=1e8 +): + template_mask = batch["template_pseudo_beta_mask"] + template_mask_2d = template_mask[..., None] * template_mask[..., None, :] + + # Compute distogram (this seems to differ slightly from Alg. 5) + tpb = batch["template_pseudo_beta"] + dgram = torch.sum( + (tpb[..., None, :] - tpb[..., None, :, :]) ** 2, dim=-1, keepdim=True + ) + lower = torch.linspace(min_bin, max_bin, no_bins, device=tpb.device) ** 2 + upper = torch.cat([lower[:-1], lower.new_tensor([inf])], dim=-1) + dgram = ((dgram > lower) * (dgram < upper)).type(dgram.dtype) + + to_concat = [dgram, template_mask_2d[..., None]] + + aatype_one_hot = nn.functional.one_hot( + batch["template_aatype"], + rc.restype_num + 2, + ) + + n_res = batch["template_aatype"].shape[-1] + to_concat.append( + aatype_one_hot[..., None, :, :].expand( + *aatype_one_hot.shape[:-2], n_res, -1, -1 + ) + ) + to_concat.append( + aatype_one_hot[..., None, :].expand( + *aatype_one_hot.shape[:-2], -1, n_res, -1 + ) + ) + + n, ca, c = [rc.atom_order[a] for a in ["N", "CA", "C"]] + rigids = Rigid.make_transform_from_reference( + n_xyz=batch["template_all_atom_positions"][..., n, :], + ca_xyz=batch["template_all_atom_positions"][..., ca, :], + c_xyz=batch["template_all_atom_positions"][..., c, :], + eps=eps, + ) + points = rigids.get_trans()[..., None, :, :] + rigid_vec = rigids[..., None].invert_apply(points) + + inv_distance_scalar = torch.rsqrt(eps + torch.sum(rigid_vec ** 2, dim=-1)) + + t_aa_masks = batch["template_all_atom_mask"] + template_mask = ( + t_aa_masks[..., n] * t_aa_masks[..., ca] * t_aa_masks[..., c] + ) + template_mask_2d = template_mask[..., None] * template_mask[..., None, :] + + inv_distance_scalar = inv_distance_scalar * template_mask_2d + unit_vector = rigid_vec * inv_distance_scalar[..., None] + to_concat.extend(torch.unbind(unit_vector[..., None, :], dim=-1)) + to_concat.append(template_mask_2d[..., None]) + + act = torch.cat(to_concat, dim=-1) + act = act * template_mask_2d[..., None] + + return act + + +def build_extra_msa_feat(batch): + msa_1hot = nn.functional.one_hot(batch["extra_msa"], 23) + msa_feat = [ + msa_1hot, + batch["extra_has_deletion"].unsqueeze(-1), + batch["extra_deletion_value"].unsqueeze(-1), + ] + return torch.cat(msa_feat, dim=-1) + + +def torsion_angles_to_frames( + r: Rigid, + alpha: torch.Tensor, + aatype: torch.Tensor, + rrgdf: torch.Tensor, +): + # [*, N, 8, 4, 4] + default_4x4 = rrgdf[aatype, ...] + + # [*, N, 8] transformations, i.e. + # One [*, N, 8, 3, 3] rotation matrix and + # One [*, N, 8, 3] translation matrix + default_r = r.from_tensor_4x4(default_4x4) + + bb_rot = alpha.new_zeros((*((1,) * len(alpha.shape[:-1])), 2)) + bb_rot[..., 1] = 1 + + # [*, N, 8, 2] + alpha = torch.cat( + [bb_rot.expand(*alpha.shape[:-2], -1, -1), alpha], dim=-2 + ) + + # [*, N, 8, 3, 3] + # Produces rotation matrices of the form: + # [ + # [1, 0 , 0 ], + # [0, a_2,-a_1], + # [0, a_1, a_2] + # ] + # This follows the original code rather than the supplement, which uses + # different indices. + + all_rots = alpha.new_zeros(default_r.get_rots().get_rot_mats().shape) + all_rots[..., 0, 0] = 1 + all_rots[..., 1, 1] = alpha[..., 1] + all_rots[..., 1, 2] = -alpha[..., 0] + all_rots[..., 2, 1:] = alpha + + all_rots = Rigid(Rotation(rot_mats=all_rots), None) + + all_frames = default_r.compose(all_rots) + + chi2_frame_to_frame = all_frames[..., 5] + chi3_frame_to_frame = all_frames[..., 6] + chi4_frame_to_frame = all_frames[..., 7] + + chi1_frame_to_bb = all_frames[..., 4] + chi2_frame_to_bb = chi1_frame_to_bb.compose(chi2_frame_to_frame) + chi3_frame_to_bb = chi2_frame_to_bb.compose(chi3_frame_to_frame) + chi4_frame_to_bb = chi3_frame_to_bb.compose(chi4_frame_to_frame) + + all_frames_to_bb = Rigid.cat( + [ + all_frames[..., :5], + chi2_frame_to_bb.unsqueeze(-1), + chi3_frame_to_bb.unsqueeze(-1), + chi4_frame_to_bb.unsqueeze(-1), + ], + dim=-1, + ) + + all_frames_to_global = r[..., None].compose(all_frames_to_bb) + + return all_frames_to_global + + +def frames_and_literature_positions_to_atom14_pos( + r: Rigid, + aatype: torch.Tensor, + default_frames, + group_idx, + atom_mask, + lit_positions, +): + # [*, N, 14, 4, 4] + default_4x4 = default_frames[aatype, ...] + + # [*, N, 14] + group_mask = group_idx[aatype, ...] + + # [*, N, 14, 8] + group_mask = nn.functional.one_hot( + group_mask, + num_classes=default_frames.shape[-3], + ) + + # [*, N, 14, 8] + t_atoms_to_global = r[..., None, :] * group_mask + + # [*, N, 14] + t_atoms_to_global = t_atoms_to_global.map_tensor_fn( + lambda x: torch.sum(x, dim=-1) + ) + + # [*, N, 14, 1] + atom_mask = atom_mask[aatype, ...].unsqueeze(-1) + + # [*, N, 14, 3] + lit_positions = lit_positions[aatype, ...] + pred_positions = t_atoms_to_global.apply(lit_positions) + pred_positions = pred_positions * atom_mask + + return pred_positions diff --git a/openfold/utils/import_weights.py b/openfold/utils/import_weights.py new file mode 100644 index 0000000000000000000000000000000000000000..4b1ebd6d6adc6d60e9dadbb04c677db013cb7f3a --- /dev/null +++ b/openfold/utils/import_weights.py @@ -0,0 +1,449 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from dataclasses import dataclass +from functools import partial +import numpy as np +import torch +from typing import Union, List + + +_NPZ_KEY_PREFIX = "alphafold/alphafold_iteration/" + + +# With Param, a poor man's enum with attributes (Rust-style) +class ParamType(Enum): + LinearWeight = partial( # hack: partial prevents fns from becoming methods + lambda w: w.transpose(-1, -2) + ) + LinearWeightMHA = partial( + lambda w: w.reshape(*w.shape[:-2], -1).transpose(-1, -2) + ) + LinearMHAOutputWeight = partial( + lambda w: w.reshape(*w.shape[:-3], -1, w.shape[-1]).transpose(-1, -2) + ) + LinearBiasMHA = partial(lambda w: w.reshape(*w.shape[:-2], -1)) + LinearWeightOPM = partial( + lambda w: w.reshape(*w.shape[:-3], -1, w.shape[-1]).transpose(-1, -2) + ) + Other = partial(lambda w: w) + + def __init__(self, fn): + self.transformation = fn + + +@dataclass +class Param: + param: Union[torch.Tensor, List[torch.Tensor]] + param_type: ParamType = ParamType.Other + stacked: bool = False + + +def _process_translations_dict(d, top_layer=True): + flat = {} + for k, v in d.items(): + if type(v) == dict: + prefix = _NPZ_KEY_PREFIX if top_layer else "" + sub_flat = { + (prefix + "/".join([k, k_prime])): v_prime + for k_prime, v_prime in _process_translations_dict( + v, top_layer=False + ).items() + } + flat.update(sub_flat) + else: + k = "/" + k if not top_layer else k + flat[k] = v + + return flat + + +def stacked(param_dict_list, out=None): + """ + Args: + param_dict_list: + A list of (nested) Param dicts to stack. The structure of + each dict must be the identical (down to the ParamTypes of + "parallel" Params). There must be at least one dict + in the list. + """ + if out is None: + out = {} + template = param_dict_list[0] + for k, _ in template.items(): + v = [d[k] for d in param_dict_list] + if type(v[0]) is dict: + out[k] = {} + stacked(v, out=out[k]) + elif type(v[0]) is Param: + stacked_param = Param( + param=[param.param for param in v], + param_type=v[0].param_type, + stacked=True, + ) + + out[k] = stacked_param + + return out + + +def assign(translation_dict, orig_weights): + for k, param in translation_dict.items(): + with torch.no_grad(): + weights = torch.as_tensor(orig_weights[k]) + ref, param_type = param.param, param.param_type + if param.stacked: + weights = torch.unbind(weights, 0) + else: + weights = [weights] + ref = [ref] + + try: + weights = list(map(param_type.transformation, weights)) + for p, w in zip(ref, weights): + p.copy_(w) + except: + print(k) + print(ref[0].shape) + print(weights[0].shape) + raise + + +def import_jax_weights_(model, npz_path, version="model_1"): + data = np.load(npz_path) + + ####################### + # Some templates + ####################### + + LinearWeight = lambda l: (Param(l, param_type=ParamType.LinearWeight)) + + LinearBias = lambda l: (Param(l)) + + LinearWeightMHA = lambda l: (Param(l, param_type=ParamType.LinearWeightMHA)) + + LinearBiasMHA = lambda b: (Param(b, param_type=ParamType.LinearBiasMHA)) + + LinearWeightOPM = lambda l: (Param(l, param_type=ParamType.LinearWeightOPM)) + + LinearParams = lambda l: { + "weights": LinearWeight(l.weight), + "bias": LinearBias(l.bias), + } + + LayerNormParams = lambda l: { + "scale": Param(l.weight), + "offset": Param(l.bias), + } + + AttentionParams = lambda att: { + "query_w": LinearWeightMHA(att.linear_q.weight), + "key_w": LinearWeightMHA(att.linear_k.weight), + "value_w": LinearWeightMHA(att.linear_v.weight), + "output_w": Param( + att.linear_o.weight, + param_type=ParamType.LinearMHAOutputWeight, + ), + "output_b": LinearBias(att.linear_o.bias), + } + + AttentionGatedParams = lambda att: dict( + **AttentionParams(att), + **{ + "gating_w": LinearWeightMHA(att.linear_g.weight), + "gating_b": LinearBiasMHA(att.linear_g.bias), + }, + ) + + GlobalAttentionParams = lambda att: dict( + AttentionGatedParams(att), + key_w=LinearWeight(att.linear_k.weight), + value_w=LinearWeight(att.linear_v.weight), + ) + + TriAttParams = lambda tri_att: { + "query_norm": LayerNormParams(tri_att.layer_norm), + "feat_2d_weights": LinearWeight(tri_att.linear.weight), + "attention": AttentionGatedParams(tri_att.mha), + } + + TriMulOutParams = lambda tri_mul: { + "layer_norm_input": LayerNormParams(tri_mul.layer_norm_in), + "left_projection": LinearParams(tri_mul.linear_a_p), + "right_projection": LinearParams(tri_mul.linear_b_p), + "left_gate": LinearParams(tri_mul.linear_a_g), + "right_gate": LinearParams(tri_mul.linear_b_g), + "center_layer_norm": LayerNormParams(tri_mul.layer_norm_out), + "output_projection": LinearParams(tri_mul.linear_z), + "gating_linear": LinearParams(tri_mul.linear_g), + } + + # see commit b88f8da on the Alphafold repo + # Alphafold swaps the pseudocode's a and b between the incoming/outcoming + # iterations of triangle multiplication, which is confusing and not + # reproduced in our implementation. + TriMulInParams = lambda tri_mul: { + "layer_norm_input": LayerNormParams(tri_mul.layer_norm_in), + "left_projection": LinearParams(tri_mul.linear_b_p), + "right_projection": LinearParams(tri_mul.linear_a_p), + "left_gate": LinearParams(tri_mul.linear_b_g), + "right_gate": LinearParams(tri_mul.linear_a_g), + "center_layer_norm": LayerNormParams(tri_mul.layer_norm_out), + "output_projection": LinearParams(tri_mul.linear_z), + "gating_linear": LinearParams(tri_mul.linear_g), + } + + PairTransitionParams = lambda pt: { + "input_layer_norm": LayerNormParams(pt.layer_norm), + "transition1": LinearParams(pt.linear_1), + "transition2": LinearParams(pt.linear_2), + } + + MSAAttParams = lambda matt: { + "query_norm": LayerNormParams(matt.layer_norm_m), + "attention": AttentionGatedParams(matt.mha), + } + + MSAColAttParams = lambda matt: { + "query_norm": LayerNormParams(matt._msa_att.layer_norm_m), + "attention": AttentionGatedParams(matt._msa_att.mha), + } + + MSAGlobalAttParams = lambda matt: { + "query_norm": LayerNormParams(matt.layer_norm_m), + "attention": GlobalAttentionParams(matt.global_attention), + } + + MSAAttPairBiasParams = lambda matt: dict( + **MSAAttParams(matt), + **{ + "feat_2d_norm": LayerNormParams(matt.layer_norm_z), + "feat_2d_weights": LinearWeight(matt.linear_z.weight), + }, + ) + + IPAParams = lambda ipa: { + "q_scalar": LinearParams(ipa.linear_q), + "kv_scalar": LinearParams(ipa.linear_kv), + "q_point_local": LinearParams(ipa.linear_q_points), + "kv_point_local": LinearParams(ipa.linear_kv_points), + "trainable_point_weights": Param( + param=ipa.head_weights, param_type=ParamType.Other + ), + "attention_2d": LinearParams(ipa.linear_b), + "output_projection": LinearParams(ipa.linear_out), + } + + TemplatePairBlockParams = lambda b: { + "triangle_attention_starting_node": TriAttParams(b.tri_att_start), + "triangle_attention_ending_node": TriAttParams(b.tri_att_end), + "triangle_multiplication_outgoing": TriMulOutParams(b.tri_mul_out), + "triangle_multiplication_incoming": TriMulInParams(b.tri_mul_in), + "pair_transition": PairTransitionParams(b.pair_transition), + } + + MSATransitionParams = lambda m: { + "input_layer_norm": LayerNormParams(m.layer_norm), + "transition1": LinearParams(m.linear_1), + "transition2": LinearParams(m.linear_2), + } + + OuterProductMeanParams = lambda o: { + "layer_norm_input": LayerNormParams(o.layer_norm), + "left_projection": LinearParams(o.linear_1), + "right_projection": LinearParams(o.linear_2), + "output_w": LinearWeightOPM(o.linear_out.weight), + "output_b": LinearBias(o.linear_out.bias), + } + + def EvoformerBlockParams(b, is_extra_msa=False): + if is_extra_msa: + col_att_name = "msa_column_global_attention" + msa_col_att_params = MSAGlobalAttParams(b.msa_att_col) + else: + col_att_name = "msa_column_attention" + msa_col_att_params = MSAColAttParams(b.msa_att_col) + + d = { + "msa_row_attention_with_pair_bias": MSAAttPairBiasParams( + b.msa_att_row + ), + col_att_name: msa_col_att_params, + "msa_transition": MSATransitionParams(b.core.msa_transition), + "outer_product_mean": + OuterProductMeanParams(b.core.outer_product_mean), + "triangle_multiplication_outgoing": + TriMulOutParams(b.core.tri_mul_out), + "triangle_multiplication_incoming": + TriMulInParams(b.core.tri_mul_in), + "triangle_attention_starting_node": + TriAttParams(b.core.tri_att_start), + "triangle_attention_ending_node": + TriAttParams(b.core.tri_att_end), + "pair_transition": + PairTransitionParams(b.core.pair_transition), + } + + return d + + ExtraMSABlockParams = partial(EvoformerBlockParams, is_extra_msa=True) + + FoldIterationParams = lambda sm: { + "invariant_point_attention": IPAParams(sm.ipa), + "attention_layer_norm": LayerNormParams(sm.layer_norm_ipa), + "transition": LinearParams(sm.transition.layers[0].linear_1), + "transition_1": LinearParams(sm.transition.layers[0].linear_2), + "transition_2": LinearParams(sm.transition.layers[0].linear_3), + "transition_layer_norm": LayerNormParams(sm.transition.layer_norm), + "affine_update": LinearParams(sm.bb_update.linear), + "rigid_sidechain": { + "input_projection": LinearParams(sm.angle_resnet.linear_in), + "input_projection_1": LinearParams(sm.angle_resnet.linear_initial), + "resblock1": LinearParams(sm.angle_resnet.layers[0].linear_1), + "resblock2": LinearParams(sm.angle_resnet.layers[0].linear_2), + "resblock1_1": LinearParams(sm.angle_resnet.layers[1].linear_1), + "resblock2_1": LinearParams(sm.angle_resnet.layers[1].linear_2), + "unnormalized_angles": LinearParams(sm.angle_resnet.linear_out), + }, + } + + ############################ + # translations dict overflow + ############################ + + tps_blocks = model.template_pair_stack.blocks + tps_blocks_params = stacked( + [TemplatePairBlockParams(b) for b in tps_blocks] + ) + + ems_blocks = model.extra_msa_stack.blocks + ems_blocks_params = stacked([ExtraMSABlockParams(b) for b in ems_blocks]) + + evo_blocks = model.evoformer.blocks + evo_blocks_params = stacked([EvoformerBlockParams(b) for b in evo_blocks]) + + translations = { + "evoformer": { + "preprocess_1d": LinearParams(model.input_embedder.linear_tf_m), + "preprocess_msa": LinearParams(model.input_embedder.linear_msa_m), + "left_single": LinearParams(model.input_embedder.linear_tf_z_i), + "right_single": LinearParams(model.input_embedder.linear_tf_z_j), + "prev_pos_linear": LinearParams(model.recycling_embedder.linear), + "prev_msa_first_row_norm": LayerNormParams( + model.recycling_embedder.layer_norm_m + ), + "prev_pair_norm": LayerNormParams( + model.recycling_embedder.layer_norm_z + ), + "pair_activiations": LinearParams( + model.input_embedder.linear_relpos + ), + "template_embedding": { + "single_template_embedding": { + "embedding2d": LinearParams( + model.template_pair_embedder.linear + ), + "template_pair_stack": { + "__layer_stack_no_state": tps_blocks_params, + }, + "output_layer_norm": LayerNormParams( + model.template_pair_stack.layer_norm + ), + }, + "attention": AttentionParams(model.template_pointwise_att.mha), + }, + "extra_msa_activations": LinearParams( + model.extra_msa_embedder.linear + ), + "extra_msa_stack": ems_blocks_params, + "template_single_embedding": LinearParams( + model.template_angle_embedder.linear_1 + ), + "template_projection": LinearParams( + model.template_angle_embedder.linear_2 + ), + "evoformer_iteration": evo_blocks_params, + "single_activations": LinearParams(model.evoformer.linear), + }, + "structure_module": { + "single_layer_norm": LayerNormParams( + model.structure_module.layer_norm_s + ), + "initial_projection": LinearParams( + model.structure_module.linear_in + ), + "pair_layer_norm": LayerNormParams( + model.structure_module.layer_norm_z + ), + "fold_iteration": FoldIterationParams(model.structure_module), + }, + "predicted_lddt_head": { + "input_layer_norm": LayerNormParams( + model.aux_heads.plddt.layer_norm + ), + "act_0": LinearParams(model.aux_heads.plddt.linear_1), + "act_1": LinearParams(model.aux_heads.plddt.linear_2), + "logits": LinearParams(model.aux_heads.plddt.linear_3), + }, + "distogram_head": { + "half_logits": LinearParams(model.aux_heads.distogram.linear), + }, + "experimentally_resolved_head": { + "logits": LinearParams( + model.aux_heads.experimentally_resolved.linear + ), + }, + "masked_msa_head": { + "logits": LinearParams(model.aux_heads.masked_msa.linear), + }, + } + + no_templ = [ + "model_3", + "model_4", + "model_5", + "model_3_ptm", + "model_4_ptm", + "model_5_ptm", + ] + if version in no_templ: + evo_dict = translations["evoformer"] + keys = list(evo_dict.keys()) + for k in keys: + if "template_" in k: + evo_dict.pop(k) + + if "_ptm" in version: + translations["predicted_aligned_error_head"] = { + "logits": LinearParams(model.aux_heads.tm.linear) + } + + # Flatten keys and insert missing key prefixes + flat = _process_translations_dict(translations) + + # Sanity check + keys = list(data.keys()) + flat_keys = list(flat.keys()) + incorrect = [k for k in flat_keys if k not in keys] + missing = [k for k in keys if k not in flat_keys] + # print(f"Incorrect: {incorrect}") + # print(f"Missing: {missing}") + + assert len(incorrect) == 0 + # assert(sorted(list(flat.keys())) == sorted(list(data.keys()))) + + # Set weights + assign(flat, data) diff --git a/openfold/utils/logger.py b/openfold/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..f2e2223e3020765b8d847c269285a5f5de248b94 --- /dev/null +++ b/openfold/utils/logger.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import operator +import time + +import dllogger as logger +import numpy as np +import torch.cuda.profiler as profiler +from dllogger import JSONStreamBackend, StdOutBackend, Verbosity +from pytorch_lightning import Callback + + +def is_main_process(): + return int(os.getenv("LOCAL_RANK", "0")) == 0 + + +class PerformanceLoggingCallback(Callback): + def __init__(self, log_file, global_batch_size, warmup_steps: int = 0, profile: bool = False): + logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_file), StdOutBackend(Verbosity.VERBOSE)]) + self.warmup_steps = warmup_steps + self.global_batch_size = global_batch_size + self.step = 0 + self.profile = profile + self.timestamps = [] + + def do_step(self): + self.step += 1 + if self.profile and self.step == self.warmup_steps: + profiler.start() + if self.step > self.warmup_steps: + self.timestamps.append(time.time()) + + def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx): + self.do_step() + + def on_test_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx): + self.do_step() + + def process_performance_stats(self, deltas): + def _round3(val): + return round(val, 3) + + throughput_imgps = _round3(self.global_batch_size / np.mean(deltas)) + timestamps_ms = 1000 * deltas + stats = { + f"throughput": throughput_imgps, + f"latency_mean": _round3(timestamps_ms.mean()), + } + for level in [90, 95, 99]: + stats.update({f"latency_{level}": _round3(np.percentile(timestamps_ms, level))}) + + return stats + + def _log(self): + if is_main_process(): + diffs = list(map(operator.sub, self.timestamps[1:], self.timestamps[:-1])) + deltas = np.array(diffs) + stats = self.process_performance_stats(deltas) + logger.log(step=(), data=stats) + logger.flush() + + def on_train_end(self, trainer, pl_module): + if self.profile: + profiler.stop() + self._log() + + def on_epoch_end(self, trainer, pl_module): + self._log() diff --git a/openfold/utils/loss.py b/openfold/utils/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..50809af9e7c3564e7104bfb298f50fc64f8dd597 --- /dev/null +++ b/openfold/utils/loss.py @@ -0,0 +1,1637 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +import logging +import ml_collections +import numpy as np +import torch +import torch.nn as nn +from torch.distributions.bernoulli import Bernoulli +from typing import Dict, Optional, Tuple + +from openfold.np import residue_constants +from openfold.utils import feats +from openfold.utils.rigid_utils import Rotation, Rigid +from openfold.utils.tensor_utils import ( + tree_map, + tensor_tree_map, + masked_mean, + permute_final_dims, + batched_gather, +) + + +def softmax_cross_entropy(logits, labels): + loss = -1 * torch.sum( + labels * torch.nn.functional.log_softmax(logits, dim=-1), + dim=-1, + ) + return loss + + +def sigmoid_cross_entropy(logits, labels): + log_p = torch.log(torch.sigmoid(logits)) + log_not_p = torch.log(torch.sigmoid(-logits)) + loss = -labels * log_p - (1 - labels) * log_not_p + return loss + + +def torsion_angle_loss( + a, # [*, N, 7, 2] + a_gt, # [*, N, 7, 2] + a_alt_gt, # [*, N, 7, 2] +): + # [*, N, 7] + norm = torch.norm(a, dim=-1) + + # [*, N, 7, 2] + a = a / norm.unsqueeze(-1) + + # [*, N, 7] + diff_norm_gt = torch.norm(a - a_gt, dim=-1) + diff_norm_alt_gt = torch.norm(a - a_alt_gt, dim=-1) + min_diff = torch.minimum(diff_norm_gt ** 2, diff_norm_alt_gt ** 2) + + # [*] + l_torsion = torch.mean(min_diff, dim=(-1, -2)) + l_angle_norm = torch.mean(torch.abs(norm - 1), dim=(-1, -2)) + + an_weight = 0.02 + return l_torsion + an_weight * l_angle_norm + + +def compute_fape( + pred_frames: Rigid, + target_frames: Rigid, + frames_mask: torch.Tensor, + pred_positions: torch.Tensor, + target_positions: torch.Tensor, + positions_mask: torch.Tensor, + length_scale: float, + l1_clamp_distance: Optional[float] = None, + eps=1e-8, + ignore_nan=True, +) -> torch.Tensor: + """ + Computes FAPE loss. + + Args: + pred_frames: + [*, N_frames] Rigid object of predicted frames + target_frames: + [*, N_frames] Rigid object of ground truth frames + frames_mask: + [*, N_frames] binary mask for the frames + pred_positions: + [*, N_pts, 3] predicted atom positions + target_positions: + [*, N_pts, 3] ground truth positions + positions_mask: + [*, N_pts] positions mask + length_scale: + Length scale by which the loss is divided + l1_clamp_distance: + Cutoff above which distance errors are disregarded + eps: + Small value used to regularize denominators + Returns: + [*] loss tensor + """ + # [*, N_frames, N_pts, 3] + local_pred_pos = pred_frames.invert()[..., None].apply( + pred_positions[..., None, :, :], + ) + local_target_pos = target_frames.invert()[..., None].apply( + target_positions[..., None, :, :], + ) + + error_dist = torch.sqrt( + torch.sum((local_pred_pos - local_target_pos) ** 2, dim=-1) + eps + ) + + if l1_clamp_distance is not None: + error_dist = torch.clamp(error_dist, min=0, max=l1_clamp_distance) + + normed_error = error_dist / length_scale + normed_error = normed_error * frames_mask[..., None] + normed_error = normed_error * positions_mask[..., None, :] + if ignore_nan: + normed_error = torch.nan_to_num(normed_error) + + # FP16-friendly averaging. Roughly equivalent to: + # + # norm_factor = ( + # torch.sum(frames_mask, dim=-1) * + # torch.sum(positions_mask, dim=-1) + # ) + # normed_error = torch.sum(normed_error, dim=(-1, -2)) / (eps + norm_factor) + # + # ("roughly" because eps is necessarily duplicated in the latter) + normed_error = torch.sum(normed_error, dim=-1) + normed_error = ( + normed_error / (eps + torch.sum(frames_mask, dim=-1))[..., None] + ) + normed_error = torch.sum(normed_error, dim=-1) + normed_error = normed_error / (eps + torch.sum(positions_mask, dim=-1)) + return normed_error + + +def backbone_loss( + backbone_rigid_tensor: torch.Tensor, + backbone_rigid_mask: torch.Tensor, + traj: torch.Tensor, + use_clamped_fape: Optional[torch.Tensor] = None, + clamp_distance: float = 10.0, + loss_unit_distance: float = 10.0, + eps: float = 1e-4, + **kwargs, +) -> torch.Tensor: + pred_aff = Rigid.from_tensor_7(traj) + pred_aff = Rigid( + Rotation(rot_mats=pred_aff.get_rots().get_rot_mats(), quats=None), + pred_aff.get_trans(), + ) + + # DISCREPANCY: DeepMind somehow gets a hold of a tensor_7 version of + # backbone tensor, normalizes it, and then turns it back to a rotation + # matrix. To avoid a potentially numerically unstable rotation matrix + # to quaternion conversion, we just use the original rotation matrix + # outright. This one hasn't been composed a bunch of times, though, so + # it might be fine. + gt_aff = Rigid.from_tensor_4x4(backbone_rigid_tensor) + + fape_loss = compute_fape( + pred_aff, + gt_aff[None], + backbone_rigid_mask[None], + pred_aff.get_trans(), + gt_aff[None].get_trans(), + backbone_rigid_mask[None], + l1_clamp_distance=clamp_distance, + length_scale=loss_unit_distance, + eps=eps, + ) + if use_clamped_fape is not None: + unclamped_fape_loss = compute_fape( + pred_aff, + gt_aff[None], + backbone_rigid_mask[None], + pred_aff.get_trans(), + gt_aff[None].get_trans(), + backbone_rigid_mask[None], + l1_clamp_distance=None, + length_scale=loss_unit_distance, + eps=eps, + ) + + fape_loss = fape_loss * use_clamped_fape + unclamped_fape_loss * ( + 1 - use_clamped_fape + ) + + # Average over the batch dimension + fape_loss = torch.mean(fape_loss) + + return fape_loss + + +def sidechain_loss( + sidechain_frames: torch.Tensor, + sidechain_atom_pos: torch.Tensor, + rigidgroups_gt_frames: torch.Tensor, + rigidgroups_alt_gt_frames: torch.Tensor, + rigidgroups_gt_exists: torch.Tensor, + renamed_atom14_gt_positions: torch.Tensor, + renamed_atom14_gt_exists: torch.Tensor, + alt_naming_is_better: torch.Tensor, + clamp_distance: float = 10.0, + length_scale: float = 10.0, + eps: float = 1e-4, + **kwargs, +) -> torch.Tensor: + renamed_gt_frames = ( + 1.0 - alt_naming_is_better[..., None, None, None] + ) * rigidgroups_gt_frames + alt_naming_is_better[ + ..., None, None, None + ] * rigidgroups_alt_gt_frames + + # Steamroll the inputs + sidechain_frames = sidechain_frames[-1] + batch_dims = sidechain_frames.shape[:-4] + sidechain_frames = sidechain_frames.view(*batch_dims, -1, 4, 4) + sidechain_frames = Rigid.from_tensor_4x4(sidechain_frames) + renamed_gt_frames = renamed_gt_frames.view(*batch_dims, -1, 4, 4) + renamed_gt_frames = Rigid.from_tensor_4x4(renamed_gt_frames) + rigidgroups_gt_exists = rigidgroups_gt_exists.reshape(*batch_dims, -1) + sidechain_atom_pos = sidechain_atom_pos[-1] + sidechain_atom_pos = sidechain_atom_pos.view(*batch_dims, -1, 3) + renamed_atom14_gt_positions = renamed_atom14_gt_positions.view( + *batch_dims, -1, 3 + ) + renamed_atom14_gt_exists = renamed_atom14_gt_exists.view(*batch_dims, -1) + + fape = compute_fape( + sidechain_frames, + renamed_gt_frames, + rigidgroups_gt_exists, + sidechain_atom_pos, + renamed_atom14_gt_positions, + renamed_atom14_gt_exists, + l1_clamp_distance=clamp_distance, + length_scale=length_scale, + eps=eps, + ) + + return fape + + +def fape_loss( + out: Dict[str, torch.Tensor], + batch: Dict[str, torch.Tensor], + config: ml_collections.ConfigDict, +) -> torch.Tensor: + bb_loss = backbone_loss( + traj=out["sm"]["frames"], + **{**batch, **config.backbone}, + ) + + sc_loss = sidechain_loss( + out["sm"]["sidechain_frames"], + out["sm"]["positions"], + **{**batch, **config.sidechain}, + ) + + loss = config.backbone.weight * bb_loss + config.sidechain.weight * sc_loss + + # Average over the batch dimension + loss = torch.mean(loss) + + return loss + + +def supervised_chi_loss( + angles_sin_cos: torch.Tensor, + unnormalized_angles_sin_cos: torch.Tensor, + aatype: torch.Tensor, + seq_mask: torch.Tensor, + chi_mask: torch.Tensor, + chi_angles_sin_cos: torch.Tensor, + chi_weight: float, + angle_norm_weight: float, + eps=1e-6, + **kwargs, +) -> torch.Tensor: + """ + Implements Algorithm 27 (torsionAngleLoss) + + Args: + angles_sin_cos: + [*, N, 7, 2] predicted angles + unnormalized_angles_sin_cos: + The same angles, but unnormalized + aatype: + [*, N] residue indices + seq_mask: + [*, N] sequence mask + chi_mask: + [*, N, 7] angle mask + chi_angles_sin_cos: + [*, N, 7, 2] ground truth angles + chi_weight: + Weight for the angle component of the loss + angle_norm_weight: + Weight for the normalization component of the loss + Returns: + [*] loss tensor + """ + pred_angles = angles_sin_cos[..., 3:, :] + residue_type_one_hot = torch.nn.functional.one_hot( + aatype, + residue_constants.restype_num + 1, + ) + chi_pi_periodic = torch.einsum( + "...ij,jk->ik", + residue_type_one_hot.type(angles_sin_cos.dtype), + angles_sin_cos.new_tensor(residue_constants.chi_pi_periodic), + ) + + true_chi = chi_angles_sin_cos[None] + + shifted_mask = (1 - 2 * chi_pi_periodic).unsqueeze(-1) + true_chi_shifted = shifted_mask * true_chi + sq_chi_error = torch.sum((true_chi - pred_angles) ** 2, dim=-1) + sq_chi_error_shifted = torch.sum( + (true_chi_shifted - pred_angles) ** 2, dim=-1 + ) + sq_chi_error = torch.minimum(sq_chi_error, sq_chi_error_shifted) + # The ol' switcheroo + sq_chi_error = sq_chi_error.permute( + *range(len(sq_chi_error.shape))[1:-2], 0, -2, -1 + ) + sq_chi_loss = masked_mean( + chi_mask[..., None, :, :], sq_chi_error, dim=(-1, -2, -3) + ) + + loss = chi_weight * sq_chi_loss + + angle_norm = torch.sqrt( + torch.sum(unnormalized_angles_sin_cos ** 2, dim=-1) + eps + ) + norm_error = torch.abs(angle_norm - 1.0) + norm_error = norm_error.permute( + *range(len(norm_error.shape))[1:-2], 0, -2, -1 + ) + angle_norm_loss = masked_mean( + seq_mask[..., None, :, None], norm_error, dim=(-1, -2, -3) + ) + + loss = loss + angle_norm_weight * angle_norm_loss + + # Average over the batch dimension + loss = torch.mean(loss) + + return loss + + +def compute_plddt(logits: torch.Tensor) -> torch.Tensor: + num_bins = logits.shape[-1] + bin_width = 1.0 / num_bins + bounds = torch.arange( + start=0.5 * bin_width, end=1.0, step=bin_width, device=logits.device + ) + probs = torch.nn.functional.softmax(logits, dim=-1) + pred_lddt_ca = torch.sum( + probs * bounds.view(*((1,) * len(probs.shape[:-1])), *bounds.shape), + dim=-1, + ) + return pred_lddt_ca * 100 + + +def lddt( + all_atom_pred_pos: torch.Tensor, + all_atom_positions: torch.Tensor, + all_atom_mask: torch.Tensor, + cutoff: float = 15.0, + eps: float = 1e-10, + per_residue: bool = True, +) -> torch.Tensor: + n = all_atom_mask.shape[-2] + dmat_true = torch.sqrt( + eps + + torch.sum( + ( + all_atom_positions[..., None, :] + - all_atom_positions[..., None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + dmat_pred = torch.sqrt( + eps + + torch.sum( + ( + all_atom_pred_pos[..., None, :] + - all_atom_pred_pos[..., None, :, :] + ) + ** 2, + dim=-1, + ) + ) + dists_to_score = ( + (dmat_true < cutoff) + * all_atom_mask + * permute_final_dims(all_atom_mask, (1, 0)) + * (1.0 - torch.eye(n, device=all_atom_mask.device)) + ) + + dist_l1 = torch.abs(dmat_true - dmat_pred) + + score = ( + (dist_l1 < 0.5).type(dist_l1.dtype) + + (dist_l1 < 1.0).type(dist_l1.dtype) + + (dist_l1 < 2.0).type(dist_l1.dtype) + + (dist_l1 < 4.0).type(dist_l1.dtype) + ) + score = score * 0.25 + + dims = (-1,) if per_residue else (-2, -1) + norm = 1.0 / (eps + torch.sum(dists_to_score, dim=dims)) + score = norm * (eps + torch.sum(dists_to_score * score, dim=dims)) + + return score + + +def lddt_ca( + all_atom_pred_pos: torch.Tensor, + all_atom_positions: torch.Tensor, + all_atom_mask: torch.Tensor, + cutoff: float = 15.0, + eps: float = 1e-10, + per_residue: bool = True, +) -> torch.Tensor: + ca_pos = residue_constants.atom_order["CA"] + all_atom_pred_pos = all_atom_pred_pos[..., ca_pos, :] + all_atom_positions = all_atom_positions[..., ca_pos, :] + all_atom_mask = all_atom_mask[..., ca_pos : (ca_pos + 1)] # keep dim + + return lddt( + all_atom_pred_pos, + all_atom_positions, + all_atom_mask, + cutoff=cutoff, + eps=eps, + per_residue=per_residue, + ) + + +def lddt_loss( + logits: torch.Tensor, + all_atom_pred_pos: torch.Tensor, + all_atom_positions: torch.Tensor, + all_atom_mask: torch.Tensor, + resolution: torch.Tensor, + cutoff: float = 15.0, + no_bins: int = 50, + min_resolution: float = 0.1, + max_resolution: float = 3.0, + eps: float = 1e-10, + **kwargs, +) -> torch.Tensor: + n = all_atom_mask.shape[-2] + + ca_pos = residue_constants.atom_order["CA"] + all_atom_pred_pos = all_atom_pred_pos[..., ca_pos, :] + all_atom_positions = all_atom_positions[..., ca_pos, :] + all_atom_mask = all_atom_mask[..., ca_pos : (ca_pos + 1)] # keep dim + + score = lddt( + all_atom_pred_pos, + all_atom_positions, + all_atom_mask, + cutoff=cutoff, + eps=eps + ) + + score = score.detach() + + bin_index = torch.floor(score * no_bins).long() + bin_index = torch.clamp(bin_index, max=(no_bins - 1)) + lddt_ca_one_hot = torch.nn.functional.one_hot( + bin_index, num_classes=no_bins + ) + + errors = softmax_cross_entropy(logits, lddt_ca_one_hot) + all_atom_mask = all_atom_mask.squeeze(-1) + loss = torch.sum(errors * all_atom_mask, dim=-1) / ( + eps + torch.sum(all_atom_mask, dim=-1) + ) + + loss = loss * ( + (resolution >= min_resolution) & (resolution <= max_resolution) + ) + + # Average over the batch dimension + loss = torch.mean(loss) + + return loss + + +def distogram_loss( + logits, + pseudo_beta, + pseudo_beta_mask, + min_bin=2.3125, + max_bin=21.6875, + no_bins=64, + eps=1e-6, + **kwargs, +): + boundaries = torch.linspace( + min_bin, + max_bin, + no_bins - 1, + device=logits.device, + ) + boundaries = boundaries ** 2 + + dists = torch.sum( + (pseudo_beta[..., None, :] - pseudo_beta[..., None, :, :]) ** 2, + dim=-1, + keepdims=True, + ) + + true_bins = torch.sum(dists > boundaries, dim=-1) + + errors = softmax_cross_entropy( + logits, + torch.nn.functional.one_hot(true_bins, no_bins), + ) + + square_mask = pseudo_beta_mask[..., None] * pseudo_beta_mask[..., None, :] + + # FP16-friendly sum. Equivalent to: + # mean = (torch.sum(errors * square_mask, dim=(-1, -2)) / + # (eps + torch.sum(square_mask, dim=(-1, -2)))) + denom = eps + torch.sum(square_mask, dim=(-1, -2)) + mean = errors * square_mask + mean = torch.sum(mean, dim=-1) + mean = mean / denom[..., None] + mean = torch.sum(mean, dim=-1) + + # Average over the batch dimensions + mean = torch.mean(mean) + + return mean + + +def _calculate_bin_centers(boundaries: torch.Tensor): + step = boundaries[1] - boundaries[0] + bin_centers = boundaries + step / 2 + bin_centers = torch.cat( + [bin_centers, (bin_centers[-1] + step).unsqueeze(-1)], dim=0 + ) + return bin_centers + + +def _calculate_expected_aligned_error( + alignment_confidence_breaks: torch.Tensor, + aligned_distance_error_probs: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + bin_centers = _calculate_bin_centers(alignment_confidence_breaks) + return ( + torch.sum(aligned_distance_error_probs * bin_centers, dim=-1), + bin_centers[-1], + ) + + +def compute_predicted_aligned_error( + logits: torch.Tensor, + max_bin: int = 31, + no_bins: int = 64, + **kwargs, +) -> Dict[str, torch.Tensor]: + """Computes aligned confidence metrics from logits. + + Args: + logits: [*, num_res, num_res, num_bins] the logits output from + PredictedAlignedErrorHead. + max_bin: Maximum bin value + no_bins: Number of bins + Returns: + aligned_confidence_probs: [*, num_res, num_res, num_bins] the predicted + aligned error probabilities over bins for each residue pair. + predicted_aligned_error: [*, num_res, num_res] the expected aligned distance + error for each pair of residues. + max_predicted_aligned_error: [*] the maximum predicted error possible. + """ + boundaries = torch.linspace( + 0, max_bin, steps=(no_bins - 1), device=logits.device + ) + + aligned_confidence_probs = torch.nn.functional.softmax(logits, dim=-1) + ( + predicted_aligned_error, + max_predicted_aligned_error, + ) = _calculate_expected_aligned_error( + alignment_confidence_breaks=boundaries, + aligned_distance_error_probs=aligned_confidence_probs, + ) + + return { + "aligned_confidence_probs": aligned_confidence_probs, + "predicted_aligned_error": predicted_aligned_error, + "max_predicted_aligned_error": max_predicted_aligned_error, + } + + +def compute_tm( + logits: torch.Tensor, + residue_weights: Optional[torch.Tensor] = None, + max_bin: int = 31, + no_bins: int = 64, + eps: float = 1e-8, + **kwargs, +) -> torch.Tensor: + if residue_weights is None: + residue_weights = logits.new_ones(logits.shape[-2]) + + boundaries = torch.linspace( + 0, max_bin, steps=(no_bins - 1), device=logits.device + ) + + bin_centers = _calculate_bin_centers(boundaries) + torch.sum(residue_weights) + n = logits.shape[-2] + clipped_n = max(n, 19) + + d0 = 1.24 * (clipped_n - 15) ** (1.0 / 3) - 1.8 + + probs = torch.nn.functional.softmax(logits, dim=-1) + + tm_per_bin = 1.0 / (1 + (bin_centers ** 2) / (d0 ** 2)) + predicted_tm_term = torch.sum(probs * tm_per_bin, dim=-1) + + normed_residue_mask = residue_weights / (eps + residue_weights.sum()) + per_alignment = torch.sum(predicted_tm_term * normed_residue_mask, dim=-1) + weighted = per_alignment * residue_weights + argmax = (weighted == torch.max(weighted)).nonzero()[0] + return per_alignment[tuple(argmax)] + + +def tm_loss( + logits, + final_affine_tensor, + backbone_rigid_tensor, + backbone_rigid_mask, + resolution, + max_bin=31, + no_bins=64, + min_resolution: float = 0.1, + max_resolution: float = 3.0, + eps=1e-8, + **kwargs, +): + pred_affine = Rigid.from_tensor_7(final_affine_tensor) + backbone_rigid = Rigid.from_tensor_4x4(backbone_rigid_tensor) + + def _points(affine): + pts = affine.get_trans()[..., None, :, :] + return affine.invert()[..., None].apply(pts) + + sq_diff = torch.sum( + (_points(pred_affine) - _points(backbone_rigid)) ** 2, dim=-1 + ) + + sq_diff = sq_diff.detach() + + boundaries = torch.linspace( + 0, max_bin, steps=(no_bins - 1), device=logits.device + ) + boundaries = boundaries ** 2 + true_bins = torch.sum(sq_diff[..., None] > boundaries, dim=-1) + + errors = softmax_cross_entropy( + logits, torch.nn.functional.one_hot(true_bins, no_bins) + ) + + square_mask = ( + backbone_rigid_mask[..., None] * backbone_rigid_mask[..., None, :] + ) + + loss = torch.sum(errors * square_mask, dim=-1) + scale = 0.5 # hack to help FP16 training along + denom = eps + torch.sum(scale * square_mask, dim=(-1, -2)) + loss = loss / denom[..., None] + loss = torch.sum(loss, dim=-1) + loss = loss * scale + + loss = loss * ( + (resolution >= min_resolution) & (resolution <= max_resolution) + ) + + # Average over the loss dimension + loss = torch.mean(loss) + + return loss + + +def between_residue_bond_loss( + pred_atom_positions: torch.Tensor, # (*, N, 37/14, 3) + pred_atom_mask: torch.Tensor, # (*, N, 37/14) + residue_index: torch.Tensor, # (*, N) + aatype: torch.Tensor, # (*, N) + tolerance_factor_soft=12.0, + tolerance_factor_hard=12.0, + eps=1e-6, +) -> Dict[str, torch.Tensor]: + """Flat-bottom loss to penalize structural violations between residues. + + This is a loss penalizing any violation of the geometry around the peptide + bond between consecutive amino acids. This loss corresponds to + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 44, 45. + + Args: + pred_atom_positions: Atom positions in atom37/14 representation + pred_atom_mask: Atom mask in atom37/14 representation + residue_index: Residue index for given amino acid, this is assumed to be + monotonically increasing. + aatype: Amino acid type of given residue + tolerance_factor_soft: soft tolerance factor measured in standard deviations + of pdb distributions + tolerance_factor_hard: hard tolerance factor measured in standard deviations + of pdb distributions + + Returns: + Dict containing: + * 'c_n_loss_mean': Loss for peptide bond length violations + * 'ca_c_n_loss_mean': Loss for violations of bond angle around C spanned + by CA, C, N + * 'c_n_ca_loss_mean': Loss for violations of bond angle around N spanned + by C, N, CA + * 'per_residue_loss_sum': sum of all losses for each residue + * 'per_residue_violation_mask': mask denoting all residues with violation + present. + """ + # Get the positions of the relevant backbone atoms. + this_ca_pos = pred_atom_positions[..., :-1, 1, :] + this_ca_mask = pred_atom_mask[..., :-1, 1] + this_c_pos = pred_atom_positions[..., :-1, 2, :] + this_c_mask = pred_atom_mask[..., :-1, 2] + next_n_pos = pred_atom_positions[..., 1:, 0, :] + next_n_mask = pred_atom_mask[..., 1:, 0] + next_ca_pos = pred_atom_positions[..., 1:, 1, :] + next_ca_mask = pred_atom_mask[..., 1:, 1] + has_no_gap_mask = (residue_index[..., 1:] - residue_index[..., :-1]) == 1.0 + + # Compute loss for the C--N bond. + c_n_bond_length = torch.sqrt( + eps + torch.sum((this_c_pos - next_n_pos) ** 2, dim=-1) + ) + + # The C-N bond to proline has slightly different length because of the ring. + next_is_proline = aatype[..., 1:] == residue_constants.resname_to_idx["PRO"] + gt_length = ( + ~next_is_proline + ) * residue_constants.between_res_bond_length_c_n[ + 0 + ] + next_is_proline * residue_constants.between_res_bond_length_c_n[ + 1 + ] + gt_stddev = ( + ~next_is_proline + ) * residue_constants.between_res_bond_length_stddev_c_n[ + 0 + ] + next_is_proline * residue_constants.between_res_bond_length_stddev_c_n[ + 1 + ] + c_n_bond_length_error = torch.sqrt(eps + (c_n_bond_length - gt_length) ** 2) + c_n_loss_per_residue = torch.nn.functional.relu( + c_n_bond_length_error - tolerance_factor_soft * gt_stddev + ) + mask = this_c_mask * next_n_mask * has_no_gap_mask + c_n_loss = torch.sum(mask * c_n_loss_per_residue, dim=-1) / ( + torch.sum(mask, dim=-1) + eps + ) + c_n_violation_mask = mask * ( + c_n_bond_length_error > (tolerance_factor_hard * gt_stddev) + ) + + # Compute loss for the angles. + ca_c_bond_length = torch.sqrt( + eps + torch.sum((this_ca_pos - this_c_pos) ** 2, dim=-1) + ) + n_ca_bond_length = torch.sqrt( + eps + torch.sum((next_n_pos - next_ca_pos) ** 2, dim=-1) + ) + + c_ca_unit_vec = (this_ca_pos - this_c_pos) / ca_c_bond_length[..., None] + c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length[..., None] + n_ca_unit_vec = (next_ca_pos - next_n_pos) / n_ca_bond_length[..., None] + + ca_c_n_cos_angle = torch.sum(c_ca_unit_vec * c_n_unit_vec, dim=-1) + gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0] + gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0] + ca_c_n_cos_angle_error = torch.sqrt( + eps + (ca_c_n_cos_angle - gt_angle) ** 2 + ) + ca_c_n_loss_per_residue = torch.nn.functional.relu( + ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev + ) + mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask + ca_c_n_loss = torch.sum(mask * ca_c_n_loss_per_residue, dim=-1) / ( + torch.sum(mask, dim=-1) + eps + ) + ca_c_n_violation_mask = mask * ( + ca_c_n_cos_angle_error > (tolerance_factor_hard * gt_stddev) + ) + + c_n_ca_cos_angle = torch.sum((-c_n_unit_vec) * n_ca_unit_vec, dim=-1) + gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0] + gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1] + c_n_ca_cos_angle_error = torch.sqrt( + eps + torch.square(c_n_ca_cos_angle - gt_angle) + ) + c_n_ca_loss_per_residue = torch.nn.functional.relu( + c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev + ) + mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask + c_n_ca_loss = torch.sum(mask * c_n_ca_loss_per_residue, dim=-1) / ( + torch.sum(mask, dim=-1) + eps + ) + c_n_ca_violation_mask = mask * ( + c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev) + ) + + # Compute a per residue loss (equally distribute the loss to both + # neighbouring residues). + per_residue_loss_sum = ( + c_n_loss_per_residue + ca_c_n_loss_per_residue + c_n_ca_loss_per_residue + ) + per_residue_loss_sum = 0.5 * ( + torch.nn.functional.pad(per_residue_loss_sum, (0, 1)) + + torch.nn.functional.pad(per_residue_loss_sum, (1, 0)) + ) + + # Compute hard violations. + violation_mask = torch.max( + torch.stack( + [c_n_violation_mask, ca_c_n_violation_mask, c_n_ca_violation_mask], + dim=-2, + ), + dim=-2, + )[0] + violation_mask = torch.maximum( + torch.nn.functional.pad(violation_mask, (0, 1)), + torch.nn.functional.pad(violation_mask, (1, 0)), + ) + + return { + "c_n_loss_mean": c_n_loss, + "ca_c_n_loss_mean": ca_c_n_loss, + "c_n_ca_loss_mean": c_n_ca_loss, + "per_residue_loss_sum": per_residue_loss_sum, + "per_residue_violation_mask": violation_mask, + } + + +def between_residue_clash_loss( + atom14_pred_positions: torch.Tensor, + atom14_atom_exists: torch.Tensor, + atom14_atom_radius: torch.Tensor, + residue_index: torch.Tensor, + overlap_tolerance_soft=1.5, + overlap_tolerance_hard=1.5, + eps=1e-10, +) -> Dict[str, torch.Tensor]: + """Loss to penalize steric clashes between residues. + + This is a loss penalizing any steric clashes due to non bonded atoms in + different peptides coming too close. This loss corresponds to the part with + different residues of + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46. + + Args: + atom14_pred_positions: Predicted positions of atoms in + global prediction frame + atom14_atom_exists: Mask denoting whether atom at positions exists for given + amino acid type + atom14_atom_radius: Van der Waals radius for each atom. + residue_index: Residue index for given amino acid. + overlap_tolerance_soft: Soft tolerance factor. + overlap_tolerance_hard: Hard tolerance factor. + + Returns: + Dict containing: + * 'mean_loss': average clash loss + * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14) + * 'per_atom_clash_mask': mask whether atom clashes with any other atom + shape (N, 14) + """ + fp_type = atom14_pred_positions.dtype + + # Create the distance matrix. + # (N, N, 14, 14) + dists = torch.sqrt( + eps + + torch.sum( + ( + atom14_pred_positions[..., :, None, :, None, :] + - atom14_pred_positions[..., None, :, None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + # Create the mask for valid distances. + # shape (N, N, 14, 14) + dists_mask = ( + atom14_atom_exists[..., :, None, :, None] + * atom14_atom_exists[..., None, :, None, :] + ).type(fp_type) + + # Mask out all the duplicate entries in the lower triangular matrix. + # Also mask out the diagonal (atom-pairs from the same residue) -- these atoms + # are handled separately. + dists_mask = dists_mask * ( + residue_index[..., :, None, None, None] + < residue_index[..., None, :, None, None] + ) + + # Backbone C--N bond between subsequent residues is no clash. + c_one_hot = torch.nn.functional.one_hot( + residue_index.new_tensor(2), num_classes=14 + ) + c_one_hot = c_one_hot.reshape( + *((1,) * len(residue_index.shape[:-1])), *c_one_hot.shape + ) + c_one_hot = c_one_hot.type(fp_type) + n_one_hot = torch.nn.functional.one_hot( + residue_index.new_tensor(0), num_classes=14 + ) + n_one_hot = n_one_hot.reshape( + *((1,) * len(residue_index.shape[:-1])), *n_one_hot.shape + ) + n_one_hot = n_one_hot.type(fp_type) + + neighbour_mask = ( + residue_index[..., :, None, None, None] + 1 + ) == residue_index[..., None, :, None, None] + c_n_bonds = ( + neighbour_mask + * c_one_hot[..., None, None, :, None] + * n_one_hot[..., None, None, None, :] + ) + dists_mask = dists_mask * (1.0 - c_n_bonds) + + # Disulfide bridge between two cysteines is no clash. + cys = residue_constants.restype_name_to_atom14_names["CYS"] + cys_sg_idx = cys.index("SG") + cys_sg_idx = residue_index.new_tensor(cys_sg_idx) + cys_sg_idx = cys_sg_idx.reshape( + *((1,) * len(residue_index.shape[:-1])), 1 + ).squeeze(-1) + cys_sg_one_hot = torch.nn.functional.one_hot(cys_sg_idx, num_classes=14) + disulfide_bonds = ( + cys_sg_one_hot[..., None, None, :, None] + * cys_sg_one_hot[..., None, None, None, :] + ) + dists_mask = dists_mask * (1.0 - disulfide_bonds) + + # Compute the lower bound for the allowed distances. + # shape (N, N, 14, 14) + dists_lower_bound = dists_mask * ( + atom14_atom_radius[..., :, None, :, None] + + atom14_atom_radius[..., None, :, None, :] + ) + + # Compute the error. + # shape (N, N, 14, 14) + dists_to_low_error = dists_mask * torch.nn.functional.relu( + dists_lower_bound - overlap_tolerance_soft - dists + ) + + # Compute the mean loss. + # shape () + mean_loss = torch.sum(dists_to_low_error) / (1e-6 + torch.sum(dists_mask)) + + # Compute the per atom loss sum. + # shape (N, 14) + per_atom_loss_sum = torch.sum(dists_to_low_error, dim=(-4, -2)) + torch.sum( + dists_to_low_error, axis=(-3, -1) + ) + + # Compute the hard clash mask. + # shape (N, N, 14, 14) + clash_mask = dists_mask * ( + dists < (dists_lower_bound - overlap_tolerance_hard) + ) + + # Compute the per atom clash. + # shape (N, 14) + per_atom_clash_mask = torch.maximum( + torch.amax(clash_mask, axis=(-4, -2)), + torch.amax(clash_mask, axis=(-3, -1)), + ) + + return { + "mean_loss": mean_loss, # shape () + "per_atom_loss_sum": per_atom_loss_sum, # shape (N, 14) + "per_atom_clash_mask": per_atom_clash_mask, # shape (N, 14) + } + + +def within_residue_violations( + atom14_pred_positions: torch.Tensor, + atom14_atom_exists: torch.Tensor, + atom14_dists_lower_bound: torch.Tensor, + atom14_dists_upper_bound: torch.Tensor, + tighten_bounds_for_loss=0.0, + eps=1e-10, +) -> Dict[str, torch.Tensor]: + """Loss to penalize steric clashes within residues. + + This is a loss penalizing any steric violations or clashes of non-bonded atoms + in a given peptide. This loss corresponds to the part with + the same residues of + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46. + + Args: + atom14_pred_positions ([*, N, 14, 3]): + Predicted positions of atoms in global prediction frame. + atom14_atom_exists ([*, N, 14]): + Mask denoting whether atom at positions exists for given + amino acid type + atom14_dists_lower_bound ([*, N, 14]): + Lower bound on allowed distances. + atom14_dists_upper_bound ([*, N, 14]): + Upper bound on allowed distances + tighten_bounds_for_loss ([*, N]): + Extra factor to tighten loss + + Returns: + Dict containing: + * 'per_atom_loss_sum' ([*, N, 14]): + sum of all clash losses per atom, shape + * 'per_atom_clash_mask' ([*, N, 14]): + mask whether atom clashes with any other atom shape + """ + # Compute the mask for each residue. + dists_masks = 1.0 - torch.eye(14, device=atom14_atom_exists.device)[None] + dists_masks = dists_masks.reshape( + *((1,) * len(atom14_atom_exists.shape[:-2])), *dists_masks.shape + ) + dists_masks = ( + atom14_atom_exists[..., :, :, None] + * atom14_atom_exists[..., :, None, :] + * dists_masks + ) + + # Distance matrix + dists = torch.sqrt( + eps + + torch.sum( + ( + atom14_pred_positions[..., :, :, None, :] + - atom14_pred_positions[..., :, None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + # Compute the loss. + dists_to_low_error = torch.nn.functional.relu( + atom14_dists_lower_bound + tighten_bounds_for_loss - dists + ) + dists_to_high_error = torch.nn.functional.relu( + dists - (atom14_dists_upper_bound - tighten_bounds_for_loss) + ) + loss = dists_masks * (dists_to_low_error + dists_to_high_error) + + # Compute the per atom loss sum. + per_atom_loss_sum = torch.sum(loss, dim=-2) + torch.sum(loss, dim=-1) + + # Compute the violations mask. + violations = dists_masks * ( + (dists < atom14_dists_lower_bound) | (dists > atom14_dists_upper_bound) + ) + + # Compute the per atom violations. + per_atom_violations = torch.maximum( + torch.max(violations, dim=-2)[0], torch.max(violations, axis=-1)[0] + ) + + return { + "per_atom_loss_sum": per_atom_loss_sum, + "per_atom_violations": per_atom_violations, + } + + +def find_structural_violations( + batch: Dict[str, torch.Tensor], + atom14_pred_positions: torch.Tensor, + violation_tolerance_factor: float, + clash_overlap_tolerance: float, + **kwargs, +) -> Dict[str, torch.Tensor]: + """Computes several checks for structural violations.""" + + # Compute between residue backbone violations of bonds and angles. + connection_violations = between_residue_bond_loss( + pred_atom_positions=atom14_pred_positions, + pred_atom_mask=batch["atom14_atom_exists"], + residue_index=batch["residue_index"], + aatype=batch["aatype"], + tolerance_factor_soft=violation_tolerance_factor, + tolerance_factor_hard=violation_tolerance_factor, + ) + + # Compute the Van der Waals radius for every atom + # (the first letter of the atom name is the element type). + # Shape: (N, 14). + atomtype_radius = [ + residue_constants.van_der_waals_radius[name[0]] + for name in residue_constants.atom_types + ] + atomtype_radius = atom14_pred_positions.new_tensor(atomtype_radius) + atom14_atom_radius = ( + batch["atom14_atom_exists"] + * atomtype_radius[batch["residx_atom14_to_atom37"]] + ) + + # Compute the between residue clash loss. + between_residue_clashes = between_residue_clash_loss( + atom14_pred_positions=atom14_pred_positions, + atom14_atom_exists=batch["atom14_atom_exists"], + atom14_atom_radius=atom14_atom_radius, + residue_index=batch["residue_index"], + overlap_tolerance_soft=clash_overlap_tolerance, + overlap_tolerance_hard=clash_overlap_tolerance, + ) + + # Compute all within-residue violations (clashes, + # bond length and angle violations). + restype_atom14_bounds = residue_constants.make_atom14_dists_bounds( + overlap_tolerance=clash_overlap_tolerance, + bond_length_tolerance_factor=violation_tolerance_factor, + ) + atom14_atom_exists = batch["atom14_atom_exists"] + atom14_dists_lower_bound = atom14_pred_positions.new_tensor( + restype_atom14_bounds["lower_bound"] + )[batch["aatype"]] + atom14_dists_upper_bound = atom14_pred_positions.new_tensor( + restype_atom14_bounds["upper_bound"] + )[batch["aatype"]] + residue_violations = within_residue_violations( + atom14_pred_positions=atom14_pred_positions, + atom14_atom_exists=batch["atom14_atom_exists"], + atom14_dists_lower_bound=atom14_dists_lower_bound, + atom14_dists_upper_bound=atom14_dists_upper_bound, + tighten_bounds_for_loss=0.0, + ) + + # Combine them to a single per-residue violation mask (used later for LDDT). + per_residue_violations_mask = torch.max( + torch.stack( + [ + connection_violations["per_residue_violation_mask"], + torch.max( + between_residue_clashes["per_atom_clash_mask"], dim=-1 + )[0], + torch.max(residue_violations["per_atom_violations"], dim=-1)[0], + ], + dim=-1, + ), + dim=-1, + )[0] + + return { + "between_residues": { + "bonds_c_n_loss_mean": connection_violations["c_n_loss_mean"], # () + "angles_ca_c_n_loss_mean": connection_violations[ + "ca_c_n_loss_mean" + ], # () + "angles_c_n_ca_loss_mean": connection_violations[ + "c_n_ca_loss_mean" + ], # () + "connections_per_residue_loss_sum": connection_violations[ + "per_residue_loss_sum" + ], # (N) + "connections_per_residue_violation_mask": connection_violations[ + "per_residue_violation_mask" + ], # (N) + "clashes_mean_loss": between_residue_clashes["mean_loss"], # () + "clashes_per_atom_loss_sum": between_residue_clashes[ + "per_atom_loss_sum" + ], # (N, 14) + "clashes_per_atom_clash_mask": between_residue_clashes[ + "per_atom_clash_mask" + ], # (N, 14) + }, + "within_residues": { + "per_atom_loss_sum": residue_violations[ + "per_atom_loss_sum" + ], # (N, 14) + "per_atom_violations": residue_violations[ + "per_atom_violations" + ], # (N, 14), + }, + "total_per_residue_violations_mask": per_residue_violations_mask, # (N) + } + + +def find_structural_violations_np( + batch: Dict[str, np.ndarray], + atom14_pred_positions: np.ndarray, + config: ml_collections.ConfigDict, +) -> Dict[str, np.ndarray]: + to_tensor = lambda x: torch.tensor(x) + batch = tree_map(to_tensor, batch, np.ndarray) + atom14_pred_positions = to_tensor(atom14_pred_positions) + + out = find_structural_violations(batch, atom14_pred_positions, **config) + + to_np = lambda x: np.array(x) + np_out = tensor_tree_map(to_np, out) + + return np_out + + +def extreme_ca_ca_distance_violations( + pred_atom_positions: torch.Tensor, # (N, 37(14), 3) + pred_atom_mask: torch.Tensor, # (N, 37(14)) + residue_index: torch.Tensor, # (N) + max_angstrom_tolerance=1.5, + eps=1e-6, +) -> torch.Tensor: + """Counts residues whose Ca is a large distance from its neighbour. + + Measures the fraction of CA-CA pairs between consecutive amino acids that are + more than 'max_angstrom_tolerance' apart. + + Args: + pred_atom_positions: Atom positions in atom37/14 representation + pred_atom_mask: Atom mask in atom37/14 representation + residue_index: Residue index for given amino acid, this is assumed to be + monotonically increasing. + max_angstrom_tolerance: Maximum distance allowed to not count as violation. + Returns: + Fraction of consecutive CA-CA pairs with violation. + """ + this_ca_pos = pred_atom_positions[..., :-1, 1, :] + this_ca_mask = pred_atom_mask[..., :-1, 1] + next_ca_pos = pred_atom_positions[..., 1:, 1, :] + next_ca_mask = pred_atom_mask[..., 1:, 1] + has_no_gap_mask = (residue_index[..., 1:] - residue_index[..., :-1]) == 1.0 + ca_ca_distance = torch.sqrt( + eps + torch.sum((this_ca_pos - next_ca_pos) ** 2, dim=-1) + ) + violations = ( + ca_ca_distance - residue_constants.ca_ca + ) > max_angstrom_tolerance + mask = this_ca_mask * next_ca_mask * has_no_gap_mask + mean = masked_mean(mask, violations, -1) + return mean + + +def compute_violation_metrics( + batch: Dict[str, torch.Tensor], + atom14_pred_positions: torch.Tensor, # (N, 14, 3) + violations: Dict[str, torch.Tensor], +) -> Dict[str, torch.Tensor]: + """Compute several metrics to assess the structural violations.""" + ret = {} + extreme_ca_ca_violations = extreme_ca_ca_distance_violations( + pred_atom_positions=atom14_pred_positions, + pred_atom_mask=batch["atom14_atom_exists"], + residue_index=batch["residue_index"], + ) + ret["violations_extreme_ca_ca_distance"] = extreme_ca_ca_violations + ret["violations_between_residue_bond"] = masked_mean( + batch["seq_mask"], + violations["between_residues"][ + "connections_per_residue_violation_mask" + ], + dim=-1, + ) + ret["violations_between_residue_clash"] = masked_mean( + mask=batch["seq_mask"], + value=torch.max( + violations["between_residues"]["clashes_per_atom_clash_mask"], + dim=-1, + )[0], + dim=-1, + ) + ret["violations_within_residue"] = masked_mean( + mask=batch["seq_mask"], + value=torch.max( + violations["within_residues"]["per_atom_violations"], dim=-1 + )[0], + dim=-1, + ) + ret["violations_per_residue"] = masked_mean( + mask=batch["seq_mask"], + value=violations["total_per_residue_violations_mask"], + dim=-1, + ) + return ret + + +def compute_violation_metrics_np( + batch: Dict[str, np.ndarray], + atom14_pred_positions: np.ndarray, + violations: Dict[str, np.ndarray], +) -> Dict[str, np.ndarray]: + to_tensor = lambda x: torch.tensor(x) + batch = tree_map(to_tensor, batch, np.ndarray) + atom14_pred_positions = to_tensor(atom14_pred_positions) + violations = tree_map(to_tensor, violations, np.ndarray) + + out = compute_violation_metrics(batch, atom14_pred_positions, violations) + + to_np = lambda x: np.array(x) + return tree_map(to_np, out, torch.Tensor) + + +def violation_loss( + violations: Dict[str, torch.Tensor], + atom14_atom_exists: torch.Tensor, + eps=1e-6, + **kwargs, +) -> torch.Tensor: + num_atoms = torch.sum(atom14_atom_exists) + l_clash = torch.sum( + violations["between_residues"]["clashes_per_atom_loss_sum"] + + violations["within_residues"]["per_atom_loss_sum"] + ) + l_clash = l_clash / (eps + num_atoms) + loss = ( + violations["between_residues"]["bonds_c_n_loss_mean"] + + violations["between_residues"]["angles_ca_c_n_loss_mean"] + + violations["between_residues"]["angles_c_n_ca_loss_mean"] + + l_clash + ) + + return loss + + +def compute_renamed_ground_truth( + batch: Dict[str, torch.Tensor], + atom14_pred_positions: torch.Tensor, + eps=1e-10, +) -> Dict[str, torch.Tensor]: + """ + Find optimal renaming of ground truth based on the predicted positions. + + Alg. 26 "renameSymmetricGroundTruthAtoms" + + This renamed ground truth is then used for all losses, + such that each loss moves the atoms in the same direction. + + Args: + batch: Dictionary containing: + * atom14_gt_positions: Ground truth positions. + * atom14_alt_gt_positions: Ground truth positions with renaming swaps. + * atom14_atom_is_ambiguous: 1.0 for atoms that are affected by + renaming swaps. + * atom14_gt_exists: Mask for which atoms exist in ground truth. + * atom14_alt_gt_exists: Mask for which atoms exist in ground truth + after renaming. + * atom14_atom_exists: Mask for whether each atom is part of the given + amino acid type. + atom14_pred_positions: Array of atom positions in global frame with shape + Returns: + Dictionary containing: + alt_naming_is_better: Array with 1.0 where alternative swap is better. + renamed_atom14_gt_positions: Array of optimal ground truth positions + after renaming swaps are performed. + renamed_atom14_gt_exists: Mask after renaming swap is performed. + """ + + pred_dists = torch.sqrt( + eps + + torch.sum( + ( + atom14_pred_positions[..., None, :, None, :] + - atom14_pred_positions[..., None, :, None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + atom14_gt_positions = batch["atom14_gt_positions"] + gt_dists = torch.sqrt( + eps + + torch.sum( + ( + atom14_gt_positions[..., None, :, None, :] + - atom14_gt_positions[..., None, :, None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + atom14_alt_gt_positions = batch["atom14_alt_gt_positions"] + alt_gt_dists = torch.sqrt( + eps + + torch.sum( + ( + atom14_alt_gt_positions[..., None, :, None, :] + - atom14_alt_gt_positions[..., None, :, None, :, :] + ) + ** 2, + dim=-1, + ) + ) + + lddt = torch.sqrt(eps + (pred_dists - gt_dists) ** 2) + alt_lddt = torch.sqrt(eps + (pred_dists - alt_gt_dists) ** 2) + + atom14_gt_exists = batch["atom14_gt_exists"] + atom14_atom_is_ambiguous = batch["atom14_atom_is_ambiguous"] + mask = ( + atom14_gt_exists[..., None, :, None] + * atom14_atom_is_ambiguous[..., None, :, None] + * atom14_gt_exists[..., None, :, None, :] + * (1.0 - atom14_atom_is_ambiguous[..., None, :, None, :]) + ) + + per_res_lddt = torch.sum(mask * lddt, dim=(-1, -2, -3)) + alt_per_res_lddt = torch.sum(mask * alt_lddt, dim=(-1, -2, -3)) + + fp_type = atom14_pred_positions.dtype + alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).type(fp_type) + + renamed_atom14_gt_positions = ( + 1.0 - alt_naming_is_better[..., None, None] + ) * atom14_gt_positions + alt_naming_is_better[ + ..., None, None + ] * atom14_alt_gt_positions + + renamed_atom14_gt_mask = ( + 1.0 - alt_naming_is_better[..., None] + ) * atom14_gt_exists + alt_naming_is_better[..., None] * batch[ + "atom14_alt_gt_exists" + ] + + return { + "alt_naming_is_better": alt_naming_is_better, + "renamed_atom14_gt_positions": renamed_atom14_gt_positions, + "renamed_atom14_gt_exists": renamed_atom14_gt_mask, + } + + +def experimentally_resolved_loss( + logits: torch.Tensor, + atom37_atom_exists: torch.Tensor, + all_atom_mask: torch.Tensor, + resolution: torch.Tensor, + min_resolution: float, + max_resolution: float, + eps: float = 1e-8, + **kwargs, +) -> torch.Tensor: + errors = sigmoid_cross_entropy(logits, all_atom_mask) + loss = torch.sum(errors * atom37_atom_exists, dim=-1) + loss = loss / (eps + torch.sum(atom37_atom_exists, dim=(-1, -2))) + loss = torch.sum(loss, dim=-1) + + loss = loss * ( + (resolution >= min_resolution) & (resolution <= max_resolution) + ) + + loss = torch.mean(loss) + + return loss + + +def masked_msa_loss(logits, true_msa, bert_mask, eps=1e-8, **kwargs): + """ + Computes BERT-style masked MSA loss. Implements subsection 1.9.9. + + Args: + logits: [*, N_seq, N_res, 23] predicted residue distribution + true_msa: [*, N_seq, N_res] true MSA + bert_mask: [*, N_seq, N_res] MSA mask + Returns: + Masked MSA loss + """ + errors = softmax_cross_entropy( + logits, torch.nn.functional.one_hot(true_msa, num_classes=23) + ) + + # FP16-friendly averaging. Equivalent to: + # loss = ( + # torch.sum(errors * bert_mask, dim=(-1, -2)) / + # (eps + torch.sum(bert_mask, dim=(-1, -2))) + # ) + loss = errors * bert_mask + loss = torch.sum(loss, dim=-1) + scale = 0.5 + denom = eps + torch.sum(scale * bert_mask, dim=(-1, -2)) + loss = loss / denom[..., None] + loss = torch.sum(loss, dim=-1) + loss = loss * scale + + loss = torch.mean(loss) + + return loss + + +def compute_drmsd(structure_1, structure_2, mask=None): + if(mask is not None): + structure_1 = structure_1 * mask[..., None] + structure_2 = structure_2 * mask[..., None] + + d1 = structure_1[..., :, None, :] - structure_1[..., None, :, :] + d2 = structure_2[..., :, None, :] - structure_2[..., None, :, :] + + d1 = d1 ** 2 + d2 = d2 ** 2 + + d1 = torch.sqrt(torch.sum(d1, dim=-1)) + d2 = torch.sqrt(torch.sum(d2, dim=-1)) + + drmsd = d1 - d2 + drmsd = drmsd ** 2 + drmsd = torch.sum(drmsd, dim=(-1, -2)) + n = d1.shape[-1] if mask is None else torch.sum(mask, dim=-1) + drmsd = drmsd * (1 / (n * (n - 1))) if n > 1 else (drmsd * 0.) + drmsd = torch.sqrt(drmsd) + + return drmsd + + +def compute_drmsd_np(structure_1, structure_2, mask=None): + structure_1 = torch.tensor(structure_1) + structure_2 = torch.tensor(structure_2) + if(mask is not None): + mask = torch.tensor(mask) + + return compute_drmsd(structure_1, structure_2, mask) + + +class AlphaFoldLoss(nn.Module): + """Aggregation of the various losses described in the supplement""" + def __init__(self, config): + super(AlphaFoldLoss, self).__init__() + self.config = config + + def forward(self, out, batch, _return_breakdown=False): + if "violation" not in out.keys(): + out["violation"] = find_structural_violations( + batch, + out["sm"]["positions"][-1], + **self.config.violation, + ) + + if "renamed_atom14_gt_positions" not in out.keys(): + batch.update( + compute_renamed_ground_truth( + batch, + out["sm"]["positions"][-1], + ) + ) + + loss_fns = { + "distogram": lambda: distogram_loss( + logits=out["distogram_logits"], + **{**batch, **self.config.distogram}, + ), + "experimentally_resolved": lambda: experimentally_resolved_loss( + logits=out["experimentally_resolved_logits"], + **{**batch, **self.config.experimentally_resolved}, + ), + "fape": lambda: fape_loss( + out, + batch, + self.config.fape, + ), + "lddt": lambda: lddt_loss( + logits=out["lddt_logits"], + all_atom_pred_pos=out["final_atom_positions"], + **{**batch, **self.config.lddt}, + ), + "masked_msa": lambda: masked_msa_loss( + logits=out["masked_msa_logits"], + **{**batch, **self.config.masked_msa}, + ), + "supervised_chi": lambda: supervised_chi_loss( + out["sm"]["angles"], + out["sm"]["unnormalized_angles"], + **{**batch, **self.config.supervised_chi}, + ), + "violation": lambda: violation_loss( + out["violation"], + **batch, + ), + } + + if(self.config.tm.enabled): + loss_fns["tm"] = lambda: tm_loss( + logits=out["tm_logits"], + **{**batch, **out, **self.config.tm}, + ) + + cum_loss = 0. + losses = {} + for loss_name, loss_fn in loss_fns.items(): + weight = self.config[loss_name].weight + loss = loss_fn() + if(torch.isnan(loss) or torch.isinf(loss)): + logging.warning(f"{loss_name} loss is NaN. Skipping...") + loss = loss.new_tensor(0., requires_grad=True) + cum_loss = cum_loss + weight * loss + losses[loss_name] = loss.detach().clone() + + losses["unscaled_loss"] = cum_loss.detach().clone() + + # Scale the loss by the square root of the minimum of the crop size and + # the (average) sequence length. See subsection 1.9. + seq_len = torch.mean(batch["seq_length"].float()) + crop_len = batch["aatype"].shape[-1] + cum_loss = cum_loss * torch.sqrt(min(seq_len, crop_len)) + + losses["loss"] = cum_loss.detach().clone() + + if(not _return_breakdown): + return cum_loss + + return cum_loss, losses diff --git a/openfold/utils/lr_schedulers.py b/openfold/utils/lr_schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..5d068be5ec826e7d1f8d49b3459b7cc66547f9f8 --- /dev/null +++ b/openfold/utils/lr_schedulers.py @@ -0,0 +1,107 @@ +import torch + + +class AlphaFoldLRScheduler(torch.optim.lr_scheduler._LRScheduler): + """ Implements the learning rate schedule defined in the AlphaFold 2 + supplement. A linear warmup is followed by a plateau at the maximum + learning rate and then exponential decay. + + Note that the initial learning rate of the optimizer in question is + ignored; use this class' base_lr parameter to specify the starting + point of the warmup. + """ + def __init__(self, + optimizer, + last_epoch: int = -1, + verbose: bool = False, + base_lr: float = 0., + max_lr: float = 0.001, + warmup_no_steps: int = 1000, + start_decay_after_n_steps: int = 50000, + decay_every_n_steps: int = 50000, + decay_factor: float = 0.95, + ): + step_counts = { + "warmup_no_steps": warmup_no_steps, + "start_decay_after_n_steps": start_decay_after_n_steps, + } + + for k,v in step_counts.items(): + if(v < 0): + raise ValueError(f"{k} must be nonnegative") + + if(warmup_no_steps > start_decay_after_n_steps): + raise ValueError( + "warmup_no_steps must not exceed start_decay_after_n_steps" + ) + + self.optimizer = optimizer + self.last_epoch = last_epoch + self.verbose = verbose + self.base_lr = base_lr + self.max_lr = max_lr + self.warmup_no_steps = warmup_no_steps + self.start_decay_after_n_steps = start_decay_after_n_steps + self.decay_every_n_steps = decay_every_n_steps + self.decay_factor = decay_factor + + super(AlphaFoldLRScheduler, self).__init__( + optimizer, + last_epoch=last_epoch, + verbose=verbose, + ) + + def state_dict(self): + state_dict = { + k:v for k,v in self.__dict__.items() if k not in ["optimizer"] + } + + return state_dict + + def load_state_dict(self, state_dict): + self.__dict__.update(state_dict) + + def get_lr(self): + if(not self._get_lr_called_within_step): + raise RuntimeError( + "To get the last learning rate computed by the scheduler, use " + "get_last_lr()" + ) + + step_no = self.last_epoch + + if(step_no <= self.warmup_no_steps): + lr = self.base_lr + (step_no / self.warmup_no_steps) * self.max_lr + elif(step_no > self.start_decay_after_n_steps): + steps_since_decay = step_no - self.start_decay_after_n_steps + exp = (steps_since_decay // self.decay_every_n_steps) + 1 + lr = self.max_lr * (self.decay_factor ** exp) + else: # plateau + lr = self.max_lr + + return [lr for group in self.optimizer.param_groups] + + +class TestAF2LRScheduler(AlphaFoldLRScheduler): + def __init__(self, + optimizer, + last_epoch: int = -1, + verbose: bool = False, + base_lr: float = 0., + max_lr: float = 0.0001, + warmup_no_steps: int = 10, + start_decay_after_n_steps: int = 100, + decay_every_n_steps: int = 10, + decay_factor: float = 0.95, + ): + super().__init__( + optimizer, + last_epoch, + verbose, + base_lr, + max_lr, + warmup_no_steps, + start_decay_after_n_steps, + decay_every_n_steps, + decay_factor, + ) \ No newline at end of file diff --git a/openfold/utils/precision_utils.py b/openfold/utils/precision_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..81ef568637a1f6ee30a8fc295edbb60a70f4c602 --- /dev/null +++ b/openfold/utils/precision_utils.py @@ -0,0 +1,23 @@ +# Copyright 2022 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import importlib + +import torch + +def is_fp16_enabled(): + # Autocast world + fp16_enabled = torch.get_autocast_gpu_dtype() == torch.float16 + fp16_enabled = fp16_enabled and torch.is_autocast_enabled() + + return fp16_enabled diff --git a/openfold/utils/rigid_utils.py b/openfold/utils/rigid_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a49d48d76764c42516f6e33da9f738fb45d0c7b5 --- /dev/null +++ b/openfold/utils/rigid_utils.py @@ -0,0 +1,1468 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple, Any, Sequence, Callable, Optional + +import numpy as np +import torch + + +def rot_matmul( + a: torch.Tensor, + b: torch.Tensor +) -> torch.Tensor: + """ + Performs matrix multiplication of two rotation matrix tensors. Written + out by hand to avoid AMP downcasting. + + Args: + a: [*, 3, 3] left multiplicand + b: [*, 3, 3] right multiplicand + Returns: + The product ab + """ + row_1 = torch.stack( + [ + a[..., 0, 0] * b[..., 0, 0] + + a[..., 0, 1] * b[..., 1, 0] + + a[..., 0, 2] * b[..., 2, 0], + a[..., 0, 0] * b[..., 0, 1] + + a[..., 0, 1] * b[..., 1, 1] + + a[..., 0, 2] * b[..., 2, 1], + a[..., 0, 0] * b[..., 0, 2] + + a[..., 0, 1] * b[..., 1, 2] + + a[..., 0, 2] * b[..., 2, 2], + ], + dim=-1, + ) + row_2 = torch.stack( + [ + a[..., 1, 0] * b[..., 0, 0] + + a[..., 1, 1] * b[..., 1, 0] + + a[..., 1, 2] * b[..., 2, 0], + a[..., 1, 0] * b[..., 0, 1] + + a[..., 1, 1] * b[..., 1, 1] + + a[..., 1, 2] * b[..., 2, 1], + a[..., 1, 0] * b[..., 0, 2] + + a[..., 1, 1] * b[..., 1, 2] + + a[..., 1, 2] * b[..., 2, 2], + ], + dim=-1, + ) + row_3 = torch.stack( + [ + a[..., 2, 0] * b[..., 0, 0] + + a[..., 2, 1] * b[..., 1, 0] + + a[..., 2, 2] * b[..., 2, 0], + a[..., 2, 0] * b[..., 0, 1] + + a[..., 2, 1] * b[..., 1, 1] + + a[..., 2, 2] * b[..., 2, 1], + a[..., 2, 0] * b[..., 0, 2] + + a[..., 2, 1] * b[..., 1, 2] + + a[..., 2, 2] * b[..., 2, 2], + ], + dim=-1, + ) + + return torch.stack([row_1, row_2, row_3], dim=-2) + + +def rot_vec_mul( + r: torch.Tensor, + t: torch.Tensor +) -> torch.Tensor: + """ + Applies a rotation to a vector. Written out by hand to avoid transfer + to avoid AMP downcasting. + + Args: + r: [*, 3, 3] rotation matrices + t: [*, 3] coordinate tensors + Returns: + [*, 3] rotated coordinates + """ + x = t[..., 0] + y = t[..., 1] + z = t[..., 2] + return torch.stack( + [ + r[..., 0, 0] * x + r[..., 0, 1] * y + r[..., 0, 2] * z, + r[..., 1, 0] * x + r[..., 1, 1] * y + r[..., 1, 2] * z, + r[..., 2, 0] * x + r[..., 2, 1] * y + r[..., 2, 2] * z, + ], + dim=-1, + ) + + +def identity_rot_mats( + batch_dims: Tuple[int], + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, + requires_grad: bool = True, +) -> torch.Tensor: + rots = torch.eye( + 3, dtype=dtype, device=device, requires_grad=requires_grad + ) + rots = rots.view(*((1,) * len(batch_dims)), 3, 3) + rots = rots.expand(*batch_dims, -1, -1) + + return rots + + +def identity_trans( + batch_dims: Tuple[int], + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, + requires_grad: bool = True, +) -> torch.Tensor: + trans = torch.zeros( + (*batch_dims, 3), + dtype=dtype, + device=device, + requires_grad=requires_grad + ) + return trans + + +def identity_quats( + batch_dims: Tuple[int], + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, + requires_grad: bool = True, +) -> torch.Tensor: + quat = torch.zeros( + (*batch_dims, 4), + dtype=dtype, + device=device, + requires_grad=requires_grad + ) + + with torch.no_grad(): + quat[..., 0] = 1 + + return quat + + +_quat_elements = ["a", "b", "c", "d"] +_qtr_keys = [l1 + l2 for l1 in _quat_elements for l2 in _quat_elements] +_qtr_ind_dict = {key: ind for ind, key in enumerate(_qtr_keys)} + + +def _to_mat(pairs): + mat = np.zeros((4, 4)) + for pair in pairs: + key, value = pair + ind = _qtr_ind_dict[key] + mat[ind // 4][ind % 4] = value + + return mat + + +_QTR_MAT = np.zeros((4, 4, 3, 3)) +_QTR_MAT[..., 0, 0] = _to_mat([("aa", 1), ("bb", 1), ("cc", -1), ("dd", -1)]) +_QTR_MAT[..., 0, 1] = _to_mat([("bc", 2), ("ad", -2)]) +_QTR_MAT[..., 0, 2] = _to_mat([("bd", 2), ("ac", 2)]) +_QTR_MAT[..., 1, 0] = _to_mat([("bc", 2), ("ad", 2)]) +_QTR_MAT[..., 1, 1] = _to_mat([("aa", 1), ("bb", -1), ("cc", 1), ("dd", -1)]) +_QTR_MAT[..., 1, 2] = _to_mat([("cd", 2), ("ab", -2)]) +_QTR_MAT[..., 2, 0] = _to_mat([("bd", 2), ("ac", -2)]) +_QTR_MAT[..., 2, 1] = _to_mat([("cd", 2), ("ab", 2)]) +_QTR_MAT[..., 2, 2] = _to_mat([("aa", 1), ("bb", -1), ("cc", -1), ("dd", 1)]) + + +def quat_to_rot(quat: torch.Tensor) -> torch.Tensor: + """ + Converts a quaternion to a rotation matrix. + + Args: + quat: [*, 4] quaternions + Returns: + [*, 3, 3] rotation matrices + """ + # [*, 4, 4] + quat = quat[..., None] * quat[..., None, :] + + # [4, 4, 3, 3] + mat = quat.new_tensor(_QTR_MAT, requires_grad=False) + + # [*, 4, 4, 3, 3] + shaped_qtr_mat = mat.view((1,) * len(quat.shape[:-2]) + mat.shape) + quat = quat[..., None, None] * shaped_qtr_mat + + # [*, 3, 3] + return torch.sum(quat, dim=(-3, -4)) + + +def rot_to_quat( + rot: torch.Tensor, +): + if(rot.shape[-2:] != (3, 3)): + raise ValueError("Input rotation is incorrectly shaped") + + rot = [[rot[..., i, j] for j in range(3)] for i in range(3)] + [[xx, xy, xz], [yx, yy, yz], [zx, zy, zz]] = rot + + k = [ + [ xx + yy + zz, zy - yz, xz - zx, yx - xy,], + [ zy - yz, xx - yy - zz, xy + yx, xz + zx,], + [ xz - zx, xy + yx, yy - xx - zz, yz + zy,], + [ yx - xy, xz + zx, yz + zy, zz - xx - yy,] + ] + + k = (1./3.) * torch.stack([torch.stack(t, dim=-1) for t in k], dim=-2) + + _, vectors = torch.linalg.eigh(k) + return vectors[..., -1] + + +_QUAT_MULTIPLY = np.zeros((4, 4, 4)) +_QUAT_MULTIPLY[:, :, 0] = [[ 1, 0, 0, 0], + [ 0,-1, 0, 0], + [ 0, 0,-1, 0], + [ 0, 0, 0,-1]] + +_QUAT_MULTIPLY[:, :, 1] = [[ 0, 1, 0, 0], + [ 1, 0, 0, 0], + [ 0, 0, 0, 1], + [ 0, 0,-1, 0]] + +_QUAT_MULTIPLY[:, :, 2] = [[ 0, 0, 1, 0], + [ 0, 0, 0,-1], + [ 1, 0, 0, 0], + [ 0, 1, 0, 0]] + +_QUAT_MULTIPLY[:, :, 3] = [[ 0, 0, 0, 1], + [ 0, 0, 1, 0], + [ 0,-1, 0, 0], + [ 1, 0, 0, 0]] + +_QUAT_MULTIPLY_BY_VEC = _QUAT_MULTIPLY[:, 1:, :] + + +def quat_multiply(quat1, quat2): + """Multiply a quaternion by another quaternion.""" + mat = quat1.new_tensor(_QUAT_MULTIPLY) + reshaped_mat = mat.view((1,) * len(quat1.shape[:-1]) + mat.shape) + return torch.sum( + reshaped_mat * + quat1[..., :, None, None] * + quat2[..., None, :, None], + dim=(-3, -2) + ) + + +def quat_multiply_by_vec(quat, vec): + """Multiply a quaternion by a pure-vector quaternion.""" + mat = quat.new_tensor(_QUAT_MULTIPLY_BY_VEC) + reshaped_mat = mat.view((1,) * len(quat.shape[:-1]) + mat.shape) + return torch.sum( + reshaped_mat * + quat[..., :, None, None] * + vec[..., None, :, None], + dim=(-3, -2) + ) + + +def invert_rot_mat(rot_mat: torch.Tensor): + return rot_mat.transpose(-1, -2) + + +def invert_quat(quat: torch.Tensor): + quat_prime = quat.clone() + quat_prime[..., 1:] *= -1 + inv = quat_prime / torch.sum(quat ** 2, dim=-1, keepdim=True) + return inv + + +class Rotation: + """ + A 3D rotation. Depending on how the object is initialized, the + rotation is represented by either a rotation matrix or a + quaternion, though both formats are made available by helper functions. + To simplify gradient computation, the underlying format of the + rotation cannot be changed in-place. Like Rigid, the class is designed + to mimic the behavior of a torch Tensor, almost as if each Rotation + object were a tensor of rotations, in one format or another. + """ + def __init__(self, + rot_mats: Optional[torch.Tensor] = None, + quats: Optional[torch.Tensor] = None, + normalize_quats: bool = True, + ): + """ + Args: + rot_mats: + A [*, 3, 3] rotation matrix tensor. Mutually exclusive with + quats + quats: + A [*, 4] quaternion. Mutually exclusive with rot_mats. If + normalize_quats is not True, must be a unit quaternion + normalize_quats: + If quats is specified, whether to normalize quats + """ + if((rot_mats is None and quats is None) or + (rot_mats is not None and quats is not None)): + raise ValueError("Exactly one input argument must be specified") + + if((rot_mats is not None and rot_mats.shape[-2:] != (3, 3)) or + (quats is not None and quats.shape[-1] != 4)): + raise ValueError( + "Incorrectly shaped rotation matrix or quaternion" + ) + + # Force full-precision + if(quats is not None): + quats = quats.type(torch.float32) + if(rot_mats is not None): + rot_mats = rot_mats.type(torch.float32) + + if(quats is not None and normalize_quats): + quats = quats / torch.linalg.norm(quats, dim=-1, keepdim=True) + + self._rot_mats = rot_mats + self._quats = quats + + @staticmethod + def identity( + shape, + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, + requires_grad: bool = True, + fmt: str = "quat", + ): + """ + Returns an identity Rotation. + + Args: + shape: + The "shape" of the resulting Rotation object. See documentation + for the shape property + dtype: + The torch dtype for the rotation + device: + The torch device for the new rotation + requires_grad: + Whether the underlying tensors in the new rotation object + should require gradient computation + fmt: + One of "quat" or "rot_mat". Determines the underlying format + of the new object's rotation + Returns: + A new identity rotation + """ + if(fmt == "rot_mat"): + rot_mats = identity_rot_mats( + shape, dtype, device, requires_grad, + ) + return Rotation(rot_mats=rot_mats, quats=None) + elif(fmt == "quat"): + quats = identity_quats(shape, dtype, device, requires_grad) + return Rotation(rot_mats=None, quats=quats, normalize_quats=False) + else: + raise ValueError(f"Invalid format: f{fmt}") + + # Magic methods + + def __getitem__(self, index: Any): + """ + Allows torch-style indexing over the virtual shape of the rotation + object. See documentation for the shape property. + + Args: + index: + A torch index. E.g. (1, 3, 2), or (slice(None,)) + Returns: + The indexed rotation + """ + if type(index) != tuple: + index = (index,) + + if(self._rot_mats is not None): + rot_mats = self._rot_mats[index + (slice(None), slice(None))] + return Rotation(rot_mats=rot_mats) + elif(self._quats is not None): + quats = self._quats[index + (slice(None),)] + return Rotation(quats=quats, normalize_quats=False) + else: + raise ValueError("Both rotations are None") + + def __mul__(self, + right: torch.Tensor, + ): + """ + Pointwise left multiplication of the rotation with a tensor. Can be + used to e.g. mask the Rotation. + + Args: + right: + The tensor multiplicand + Returns: + The product + """ + if not(isinstance(right, torch.Tensor)): + raise TypeError("The other multiplicand must be a Tensor") + + if(self._rot_mats is not None): + rot_mats = self._rot_mats * right[..., None, None] + return Rotation(rot_mats=rot_mats, quats=None) + elif(self._quats is not None): + quats = self._quats * right[..., None] + return Rotation(rot_mats=None, quats=quats, normalize_quats=False) + else: + raise ValueError("Both rotations are None") + + def __rmul__(self, + left: torch.Tensor, + ): + """ + Reverse pointwise multiplication of the rotation with a tensor. + + Args: + left: + The left multiplicand + Returns: + The product + """ + return self.__mul__(left) + + # Properties + + @property + def shape(self) -> torch.Size: + """ + Returns the virtual shape of the rotation object. This shape is + defined as the batch dimensions of the underlying rotation matrix + or quaternion. If the Rotation was initialized with a [10, 3, 3] + rotation matrix tensor, for example, the resulting shape would be + [10]. + + Returns: + The virtual shape of the rotation object + """ + s = None + if(self._quats is not None): + s = self._quats.shape[:-1] + else: + s = self._rot_mats.shape[:-2] + + return s + + @property + def dtype(self) -> torch.dtype: + """ + Returns the dtype of the underlying rotation. + + Returns: + The dtype of the underlying rotation + """ + if(self._rot_mats is not None): + return self._rot_mats.dtype + elif(self._quats is not None): + return self._quats.dtype + else: + raise ValueError("Both rotations are None") + + @property + def device(self) -> torch.device: + """ + The device of the underlying rotation + + Returns: + The device of the underlying rotation + """ + if(self._rot_mats is not None): + return self._rot_mats.device + elif(self._quats is not None): + return self._quats.device + else: + raise ValueError("Both rotations are None") + + @property + def requires_grad(self) -> bool: + """ + Returns the requires_grad property of the underlying rotation + + Returns: + The requires_grad property of the underlying tensor + """ + if(self._rot_mats is not None): + return self._rot_mats.requires_grad + elif(self._quats is not None): + return self._quats.requires_grad + else: + raise ValueError("Both rotations are None") + + def get_rot_mats(self) -> torch.Tensor: + """ + Returns the underlying rotation as a rotation matrix tensor. + + Returns: + The rotation as a rotation matrix tensor + """ + rot_mats = self._rot_mats + if(rot_mats is None): + if(self._quats is None): + raise ValueError("Both rotations are None") + else: + rot_mats = quat_to_rot(self._quats) + + return rot_mats + + def get_quats(self) -> torch.Tensor: + """ + Returns the underlying rotation as a quaternion tensor. + + Depending on whether the Rotation was initialized with a + quaternion, this function may call torch.linalg.eigh. + + Returns: + The rotation as a quaternion tensor. + """ + quats = self._quats + if(quats is None): + if(self._rot_mats is None): + raise ValueError("Both rotations are None") + else: + quats = rot_to_quat(self._rot_mats) + + return quats + + def get_cur_rot(self) -> torch.Tensor: + """ + Return the underlying rotation in its current form + + Returns: + The stored rotation + """ + if(self._rot_mats is not None): + return self._rot_mats + elif(self._quats is not None): + return self._quats + else: + raise ValueError("Both rotations are None") + + def get_rotvec(self, eps=1e-6) -> torch.Tensor: + """ + Return the underlying axis-angle rotation vector. + + Follow's scipy's implementation: + https://github.com/scipy/scipy/blob/HEAD/scipy/spatial/transform/_rotation.pyx#L1385-L1402 + + Returns: + The stored rotation as a axis-angle vector. + """ + quat = self.get_quats() + # w > 0 to ensure 0 <= angle <= pi + flip = (quat[..., :1] < 0).float() + quat = (-1 * quat) * flip + (1 - flip) * quat + + angle = 2 * torch.atan2( + torch.linalg.norm(quat[..., 1:], dim=-1), + quat[..., 0] + ) + + angle2 = angle * angle + small_angle_scales = 2 + angle2 / 12 + 7 * angle2 * angle2 / 2880 + large_angle_scales = angle / torch.sin(angle / 2 + eps) + + small_angles = (angle <= 1e-3).float() + rot_vec_scale = small_angle_scales * small_angles + (1 - small_angles) * large_angle_scales + rot_vec = rot_vec_scale[..., None] * quat[..., 1:] + return rot_vec + + # Rotation functions + + def compose_q_update_vec(self, + q_update_vec: torch.Tensor, + normalize_quats: bool = True, + update_mask: torch.Tensor = None, + ): + """ + Returns a new quaternion Rotation after updating the current + object's underlying rotation with a quaternion update, formatted + as a [*, 3] tensor whose final three columns represent x, y, z such + that (1, x, y, z) is the desired (not necessarily unit) quaternion + update. + + Args: + q_update_vec: + A [*, 3] quaternion update tensor + normalize_quats: + Whether to normalize the output quaternion + Returns: + An updated Rotation + """ + quats = self.get_quats() + quat_update = quat_multiply_by_vec(quats, q_update_vec) + if update_mask is not None: + quat_update = quat_update * update_mask + new_quats = quats + quat_update + return Rotation( + rot_mats=None, + quats=new_quats, + normalize_quats=normalize_quats, + ) + + def compose_r(self, r): + """ + Compose the rotation matrices of the current Rotation object with + those of another. + + Args: + r: + An update rotation object + Returns: + An updated rotation object + """ + r1 = self.get_rot_mats() + r2 = r.get_rot_mats() + new_rot_mats = rot_matmul(r1, r2) + return Rotation(rot_mats=new_rot_mats, quats=None) + + def compose_q(self, r, normalize_quats: bool = True): + """ + Compose the quaternions of the current Rotation object with those + of another. + + Depending on whether either Rotation was initialized with + quaternions, this function may call torch.linalg.eigh. + + Args: + r: + An update rotation object + Returns: + An updated rotation object + """ + q1 = self.get_quats() + q2 = r.get_quats() + new_quats = quat_multiply(q1, q2) + return Rotation( + rot_mats=None, quats=new_quats, normalize_quats=normalize_quats + ) + + def apply(self, pts: torch.Tensor) -> torch.Tensor: + """ + Apply the current Rotation as a rotation matrix to a set of 3D + coordinates. + + Args: + pts: + A [*, 3] set of points + Returns: + [*, 3] rotated points + """ + rot_mats = self.get_rot_mats() + return rot_vec_mul(rot_mats, pts) + + def invert_apply(self, pts: torch.Tensor) -> torch.Tensor: + """ + The inverse of the apply() method. + + Args: + pts: + A [*, 3] set of points + Returns: + [*, 3] inverse-rotated points + """ + rot_mats = self.get_rot_mats() + inv_rot_mats = invert_rot_mat(rot_mats) + return rot_vec_mul(inv_rot_mats, pts) + + def invert(self) : + """ + Returns the inverse of the current Rotation. + + Returns: + The inverse of the current Rotation + """ + if(self._rot_mats is not None): + return Rotation( + rot_mats=invert_rot_mat(self._rot_mats), + quats=None + ) + elif(self._quats is not None): + return Rotation( + rot_mats=None, + quats=invert_quat(self._quats), + normalize_quats=False, + ) + else: + raise ValueError("Both rotations are None") + + # "Tensor" stuff + + def unsqueeze(self, + dim: int, + ): + """ + Analogous to torch.unsqueeze. The dimension is relative to the + shape of the Rotation object. + + Args: + dim: A positive or negative dimension index. + Returns: + The unsqueezed Rotation. + """ + if dim >= len(self.shape): + raise ValueError("Invalid dimension") + + if(self._rot_mats is not None): + rot_mats = self._rot_mats.unsqueeze(dim if dim >= 0 else dim - 2) + return Rotation(rot_mats=rot_mats, quats=None) + elif(self._quats is not None): + quats = self._quats.unsqueeze(dim if dim >= 0 else dim - 1) + return Rotation(rot_mats=None, quats=quats, normalize_quats=False) + else: + raise ValueError("Both rotations are None") + + @staticmethod + def cat( + rs, + dim: int, + ): + """ + Concatenates rotations along one of the batch dimensions. Analogous + to torch.cat(). + + Note that the output of this operation is always a rotation matrix, + regardless of the format of input rotations. + + Args: + rs: + A list of rotation objects + dim: + The dimension along which the rotations should be + concatenated + Returns: + A concatenated Rotation object in rotation matrix format + """ + rot_mats = [r.get_rot_mats() for r in rs] + rot_mats = torch.cat(rot_mats, dim=dim if dim >= 0 else dim - 2) + + return Rotation(rot_mats=rot_mats, quats=None) + + def map_tensor_fn(self, + fn + ): + """ + Apply a Tensor -> Tensor function to underlying rotation tensors, + mapping over the rotation dimension(s). Can be used e.g. to sum out + a one-hot batch dimension. + + Args: + fn: + A Tensor -> Tensor function to be mapped over the Rotation + Returns: + The transformed Rotation object + """ + if(self._rot_mats is not None): + rot_mats = self._rot_mats.view(self._rot_mats.shape[:-2] + (9,)) + rot_mats = torch.stack( + list(map(fn, torch.unbind(rot_mats, dim=-1))), dim=-1 + ) + rot_mats = rot_mats.view(rot_mats.shape[:-1] + (3, 3)) + return Rotation(rot_mats=rot_mats, quats=None) + elif(self._quats is not None): + quats = torch.stack( + list(map(fn, torch.unbind(self._quats, dim=-1))), dim=-1 + ) + return Rotation(rot_mats=None, quats=quats, normalize_quats=False) + else: + raise ValueError("Both rotations are None") + + def cuda(self): + """ + Analogous to the cuda() method of torch Tensors + + Returns: + A copy of the Rotation in CUDA memory + """ + if(self._rot_mats is not None): + return Rotation(rot_mats=self._rot_mats.cuda(), quats=None) + elif(self._quats is not None): + return Rotation( + rot_mats=None, + quats=self._quats.cuda(), + normalize_quats=False + ) + else: + raise ValueError("Both rotations are None") + + def to(self, + device: Optional[torch.device], + dtype: Optional[torch.dtype] + ): + """ + Analogous to the to() method of torch Tensors + + Args: + device: + A torch device + dtype: + A torch dtype + Returns: + A copy of the Rotation using the new device and dtype + """ + if(self._rot_mats is not None): + return Rotation( + rot_mats=self._rot_mats.to(device=device, dtype=dtype), + quats=None, + ) + elif(self._quats is not None): + return Rotation( + rot_mats=None, + quats=self._quats.to(device=device, dtype=dtype), + normalize_quats=False, + ) + else: + raise ValueError("Both rotations are None") + + def detach(self): + """ + Returns a copy of the Rotation whose underlying Tensor has been + detached from its torch graph. + + Returns: + A copy of the Rotation whose underlying Tensor has been detached + from its torch graph + """ + if(self._rot_mats is not None): + return Rotation(rot_mats=self._rot_mats.detach(), quats=None) + elif(self._quats is not None): + return Rotation( + rot_mats=None, + quats=self._quats.detach(), + normalize_quats=False, + ) + else: + raise ValueError("Both rotations are None") + + +class Rigid: + """ + A class representing a rigid transformation. Little more than a wrapper + around two objects: a Rotation object and a [*, 3] translation + Designed to behave approximately like a single torch tensor with the + shape of the shared batch dimensions of its component parts. + """ + def __init__(self, + rots: Optional[Rotation], + trans: Optional[torch.Tensor], + ): + """ + Args: + rots: A [*, 3, 3] rotation tensor + trans: A corresponding [*, 3] translation tensor + """ + # (we need device, dtype, etc. from at least one input) + + batch_dims, dtype, device, requires_grad = None, None, None, None + if(trans is not None): + batch_dims = trans.shape[:-1] + dtype = trans.dtype + device = trans.device + requires_grad = trans.requires_grad + elif(rots is not None): + batch_dims = rots.shape + dtype = rots.dtype + device = rots.device + requires_grad = rots.requires_grad + else: + raise ValueError("At least one input argument must be specified") + + if(rots is None): + rots = Rotation.identity( + batch_dims, dtype, device, requires_grad, + ) + elif(trans is None): + trans = identity_trans( + batch_dims, dtype, device, requires_grad, + ) + + if((rots.shape != trans.shape[:-1]) or + (rots.device != trans.device)): + raise ValueError("Rots and trans incompatible") + + # Force full precision. Happens to the rotations automatically. + trans = trans.type(torch.float32) + + self._rots = rots + self._trans = trans + + @staticmethod + def identity( + shape: Tuple[int], + dtype: Optional[torch.dtype] = None, + device: Optional[torch.device] = None, + requires_grad: bool = True, + fmt: str = "quat", + ): + """ + Constructs an identity transformation. + + Args: + shape: + The desired shape + dtype: + The dtype of both internal tensors + device: + The device of both internal tensors + requires_grad: + Whether grad should be enabled for the internal tensors + Returns: + The identity transformation + """ + return Rigid( + Rotation.identity(shape, dtype, device, requires_grad, fmt=fmt), + identity_trans(shape, dtype, device, requires_grad), + ) + + def __getitem__(self, + index: Any, + ): + """ + Indexes the affine transformation with PyTorch-style indices. + The index is applied to the shared dimensions of both the rotation + and the translation. + + E.g.:: + + r = Rotation(rot_mats=torch.rand(10, 10, 3, 3), quats=None) + t = Rigid(r, torch.rand(10, 10, 3)) + indexed = t[3, 4:6] + assert(indexed.shape == (2,)) + assert(indexed.get_rots().shape == (2,)) + assert(indexed.get_trans().shape == (2, 3)) + + Args: + index: A standard torch tensor index. E.g. 8, (10, None, 3), + or (3, slice(0, 1, None)) + Returns: + The indexed tensor + """ + if type(index) != tuple: + index = (index,) + + return Rigid( + self._rots[index], + self._trans[index + (slice(None),)], + ) + + def __mul__(self, + right: torch.Tensor, + ): + """ + Pointwise left multiplication of the transformation with a tensor. + Can be used to e.g. mask the Rigid. + + Args: + right: + The tensor multiplicand + Returns: + The product + """ + if not(isinstance(right, torch.Tensor)): + raise TypeError("The other multiplicand must be a Tensor") + + new_rots = self._rots * right + new_trans = self._trans * right[..., None] + + return Rigid(new_rots, new_trans) + + def __rmul__(self, + left: torch.Tensor, + ): + """ + Reverse pointwise multiplication of the transformation with a + tensor. + + Args: + left: + The left multiplicand + Returns: + The product + """ + return self.__mul__(left) + + @property + def shape(self) -> torch.Size: + """ + Returns the shape of the shared dimensions of the rotation and + the translation. + + Returns: + The shape of the transformation + """ + s = self._trans.shape[:-1] + return s + + @property + def device(self) -> torch.device: + """ + Returns the device on which the Rigid's tensors are located. + + Returns: + The device on which the Rigid's tensors are located + """ + return self._trans.device + + def get_rots(self) -> Rotation: + """ + Getter for the rotation. + + Returns: + The rotation object + """ + return self._rots + + def get_trans(self) -> torch.Tensor: + """ + Getter for the translation. + + Returns: + The stored translation + """ + return self._trans + + def compose_q_update_vec(self, + q_update_vec: torch.Tensor, + update_mask: torch.Tensor=None, + ): + """ + Composes the transformation with a quaternion update vector of + shape [*, 6], where the final 6 columns represent the x, y, and + z values of a quaternion of form (1, x, y, z) followed by a 3D + translation. + + Args: + q_vec: The quaternion update vector. + Returns: + The composed transformation. + """ + q_vec, t_vec = q_update_vec[..., :3], q_update_vec[..., 3:] + new_rots = self._rots.compose_q_update_vec( + q_vec, update_mask=update_mask) + + trans_update = self._rots.apply(t_vec) + if update_mask is not None: + trans_update = trans_update * update_mask + new_translation = self._trans + trans_update + + return Rigid(new_rots, new_translation) + + def compose_tran_update_vec(self, + t_vec: torch.Tensor, + update_mask: torch.Tensor=None, + ): + """ + Composes the transformation with a quaternion update vector of + shape [*, 3], where columns represent a 3D translation. + + Args: + q_vec: The quaternion update vector. + Returns: + The composed transformation. + """ + trans_update = self._rots.apply(t_vec) + if update_mask is not None: + trans_update = trans_update * update_mask + new_translation = self._trans + trans_update + + return Rigid(self._rots, new_translation) + + def compose(self, + r, + ): + """ + Composes the current rigid object with another. + + Args: + r: + Another Rigid object + Returns: + The composition of the two transformations + """ + new_rot = self._rots.compose_r(r._rots) + new_trans = self._rots.apply(r._trans) + self._trans + return Rigid(new_rot, new_trans) + + def compose_r(self, + rot, + order='right' + ): + """ + Composes the current rigid object with another. + + Args: + r: + Another Rigid object + order: + Order in which to perform rotation multiplication. + Returns: + The composition of the two transformations + """ + if order == 'right': + new_rot = self._rots.compose_r(rot) + elif order == 'left': + new_rot = rot.compose_r(self._rots) + else: + raise ValueError(f'Unrecognized multiplication order: {order}') + return Rigid(new_rot, self._trans) + + def apply(self, + pts: torch.Tensor, + ) -> torch.Tensor: + """ + Applies the transformation to a coordinate tensor. + + Args: + pts: A [*, 3] coordinate tensor. + Returns: + The transformed points. + """ + rotated = self._rots.apply(pts) + return rotated + self._trans + + def invert_apply(self, + pts: torch.Tensor + ) -> torch.Tensor: + """ + Applies the inverse of the transformation to a coordinate tensor. + + Args: + pts: A [*, 3] coordinate tensor + Returns: + The transformed points. + """ + pts = pts - self._trans + return self._rots.invert_apply(pts) + + def invert(self): + """ + Inverts the transformation. + + Returns: + The inverse transformation. + """ + rot_inv = self._rots.invert() + trn_inv = rot_inv.apply(self._trans) + + return Rigid(rot_inv, -1 * trn_inv) + + def map_tensor_fn(self, + fn + ): + """ + Apply a Tensor -> Tensor function to underlying translation and + rotation tensors, mapping over the translation/rotation dimensions + respectively. + + Args: + fn: + A Tensor -> Tensor function to be mapped over the Rigid + Returns: + The transformed Rigid object + """ + new_rots = self._rots.map_tensor_fn(fn) + new_trans = torch.stack( + list(map(fn, torch.unbind(self._trans, dim=-1))), + dim=-1 + ) + + return Rigid(new_rots, new_trans) + + def to_tensor_4x4(self) -> torch.Tensor: + """ + Converts a transformation to a homogenous transformation tensor. + + Returns: + A [*, 4, 4] homogenous transformation tensor + """ + tensor = self._trans.new_zeros((*self.shape, 4, 4)) + tensor[..., :3, :3] = self._rots.get_rot_mats() + tensor[..., :3, 3] = self._trans + tensor[..., 3, 3] = 1 + return tensor + + @staticmethod + def from_tensor_4x4( + t: torch.Tensor + ): + """ + Constructs a transformation from a homogenous transformation + tensor. + + Args: + t: [*, 4, 4] homogenous transformation tensor + Returns: + T object with shape [*] + """ + if(t.shape[-2:] != (4, 4)): + raise ValueError("Incorrectly shaped input tensor") + + rots = Rotation(rot_mats=t[..., :3, :3], quats=None) + trans = t[..., :3, 3] + + return Rigid(rots, trans) + + def to_tensor_7(self) -> torch.Tensor: + """ + Converts a transformation to a tensor with 7 final columns, four + for the quaternion followed by three for the translation. + + Returns: + A [*, 7] tensor representation of the transformation + """ + tensor = self._trans.new_zeros((*self.shape, 7)) + tensor[..., :4] = self._rots.get_quats() + tensor[..., 4:] = self._trans + + return tensor + + @staticmethod + def from_tensor_7( + t: torch.Tensor, + normalize_quats: bool = False, + ): + if(t.shape[-1] != 7): + raise ValueError("Incorrectly shaped input tensor") + + quats, trans = t[..., :4], t[..., 4:] + + rots = Rotation( + rot_mats=None, + quats=quats, + normalize_quats=normalize_quats + ) + + return Rigid(rots, trans) + + @staticmethod + def from_3_points( + p_neg_x_axis: torch.Tensor, + origin: torch.Tensor, + p_xy_plane: torch.Tensor, + eps: float = 1e-8 + ): + """ + Implements algorithm 21. Constructs transformations from sets of 3 + points using the Gram-Schmidt algorithm. + + Args: + p_neg_x_axis: [*, 3] coordinates + origin: [*, 3] coordinates used as frame origins + p_xy_plane: [*, 3] coordinates + eps: Small epsilon value + Returns: + A transformation object of shape [*] + """ + p_neg_x_axis = torch.unbind(p_neg_x_axis, dim=-1) + origin = torch.unbind(origin, dim=-1) + p_xy_plane = torch.unbind(p_xy_plane, dim=-1) + + e0 = [c1 - c2 for c1, c2 in zip(origin, p_neg_x_axis)] + e1 = [c1 - c2 for c1, c2 in zip(p_xy_plane, origin)] + + denom = torch.sqrt(sum((c * c for c in e0)) + eps) + e0 = [c / denom for c in e0] + dot = sum((c1 * c2 for c1, c2 in zip(e0, e1))) + e1 = [c2 - c1 * dot for c1, c2 in zip(e0, e1)] + denom = torch.sqrt(sum((c * c for c in e1)) + eps) + e1 = [c / denom for c in e1] + e2 = [ + e0[1] * e1[2] - e0[2] * e1[1], + e0[2] * e1[0] - e0[0] * e1[2], + e0[0] * e1[1] - e0[1] * e1[0], + ] + + rots = torch.stack([c for tup in zip(e0, e1, e2) for c in tup], dim=-1) + rots = rots.reshape(rots.shape[:-1] + (3, 3)) + + rot_obj = Rotation(rot_mats=rots, quats=None) + + return Rigid(rot_obj, torch.stack(origin, dim=-1)) + + def unsqueeze(self, + dim: int, + ): + """ + Analogous to torch.unsqueeze. The dimension is relative to the + shared dimensions of the rotation/translation. + + Args: + dim: A positive or negative dimension index. + Returns: + The unsqueezed transformation. + """ + if dim >= len(self.shape): + raise ValueError("Invalid dimension") + rots = self._rots.unsqueeze(dim) + trans = self._trans.unsqueeze(dim if dim >= 0 else dim - 1) + + return Rigid(rots, trans) + + @staticmethod + def cat( + ts, + dim: int, + ): + """ + Concatenates transformations along a new dimension. + + Args: + ts: + A list of T objects + dim: + The dimension along which the transformations should be + concatenated + Returns: + A concatenated transformation object + """ + rots = Rotation.cat([t._rots for t in ts], dim) + trans = torch.cat( + [t._trans for t in ts], dim=dim if dim >= 0 else dim - 1 + ) + + return Rigid(rots, trans) + + def apply_rot_fn(self, fn): + """ + Applies a Rotation -> Rotation function to the stored rotation + object. + + Args: + fn: A function of type Rotation -> Rotation + Returns: + A transformation object with a transformed rotation. + """ + return Rigid(fn(self._rots), self._trans) + + def apply_trans_fn(self, fn): + """ + Applies a Tensor -> Tensor function to the stored translation. + + Args: + fn: + A function of type Tensor -> Tensor to be applied to the + translation + Returns: + A transformation object with a transformed translation. + """ + return Rigid(self._rots, fn(self._trans)) + + def scale_translation(self, trans_scale_factor: float): + """ + Scales the translation by a constant factor. + + Args: + trans_scale_factor: + The constant factor + Returns: + A transformation object with a scaled translation. + """ + fn = lambda t: t * trans_scale_factor + return self.apply_trans_fn(fn) + + def stop_rot_gradient(self): + """ + Detaches the underlying rotation object + + Returns: + A transformation object with detached rotations + """ + fn = lambda r: r.detach() + return self.apply_rot_fn(fn) + + @staticmethod + def make_transform_from_reference(n_xyz, ca_xyz, c_xyz, eps=1e-20): + """ + Returns a transformation object from reference coordinates. + + Note that this method does not take care of symmetries. If you + provide the atom positions in the non-standard way, the N atom will + end up not at [-0.527250, 1.359329, 0.0] but instead at + [-0.527250, -1.359329, 0.0]. You need to take care of such cases in + your code. + + Args: + n_xyz: A [*, 3] tensor of nitrogen xyz coordinates. + ca_xyz: A [*, 3] tensor of carbon alpha xyz coordinates. + c_xyz: A [*, 3] tensor of carbon xyz coordinates. + Returns: + A transformation object. After applying the translation and + rotation to the reference backbone, the coordinates will + approximately equal to the input coordinates. + """ + translation = -1 * ca_xyz + n_xyz = n_xyz + translation + c_xyz = c_xyz + translation + + c_x, c_y, c_z = [c_xyz[..., i] for i in range(3)] + norm = torch.sqrt(eps + c_x ** 2 + c_y ** 2) + sin_c1 = -c_y / norm + cos_c1 = c_x / norm + zeros = sin_c1.new_zeros(sin_c1.shape) + ones = sin_c1.new_ones(sin_c1.shape) + + c1_rots = sin_c1.new_zeros((*sin_c1.shape, 3, 3)) + c1_rots[..., 0, 0] = cos_c1 + c1_rots[..., 0, 1] = -1 * sin_c1 + c1_rots[..., 1, 0] = sin_c1 + c1_rots[..., 1, 1] = cos_c1 + c1_rots[..., 2, 2] = 1 + + norm = torch.sqrt(eps + c_x ** 2 + c_y ** 2 + c_z ** 2) + sin_c2 = c_z / norm + cos_c2 = torch.sqrt(c_x ** 2 + c_y ** 2) / norm + + c2_rots = sin_c2.new_zeros((*sin_c2.shape, 3, 3)) + c2_rots[..., 0, 0] = cos_c2 + c2_rots[..., 0, 2] = sin_c2 + c2_rots[..., 1, 1] = 1 + c1_rots[..., 2, 0] = -1 * sin_c2 + c1_rots[..., 2, 2] = cos_c2 + + c_rots = rot_matmul(c2_rots, c1_rots) + n_xyz = rot_vec_mul(c_rots, n_xyz) + + _, n_y, n_z = [n_xyz[..., i] for i in range(3)] + norm = torch.sqrt(eps + n_y ** 2 + n_z ** 2) + sin_n = -n_z / norm + cos_n = n_y / norm + + n_rots = sin_c2.new_zeros((*sin_c2.shape, 3, 3)) + n_rots[..., 0, 0] = 1 + n_rots[..., 1, 1] = cos_n + n_rots[..., 1, 2] = -1 * sin_n + n_rots[..., 2, 1] = sin_n + n_rots[..., 2, 2] = cos_n + + rots = rot_matmul(n_rots, c_rots) + + rots = rots.transpose(-1, -2) + translation = -1 * translation + + rot_obj = Rotation(rot_mats=rots, quats=None) + + return Rigid(rot_obj, translation) + + def cuda(self): + """ + Moves the transformation object to GPU memory + + Returns: + A version of the transformation on GPU + """ + return Rigid(self._rots.cuda(), self._trans.cuda()) diff --git a/openfold/utils/seed.py b/openfold/utils/seed.py new file mode 100644 index 0000000000000000000000000000000000000000..b45b81379257909e06b00839bbb90848cf3d0b3f --- /dev/null +++ b/openfold/utils/seed.py @@ -0,0 +1,19 @@ +import os +import logging +import random +import numpy as np +from pytorch_lightning.utilities.seed import seed_everything + +from openfold.utils.suppress_output import SuppressLogging + + +def seed_globally(seed=None): + if("PL_GLOBAL_SEED" not in os.environ): + if(seed is None): + seed = random.randint(0, np.iinfo(np.uint32).max) + os.environ["PL_GLOBAL_SEED"] = str(seed) + logging.info(f'os.environ["PL_GLOBAL_SEED"] set to {seed}') + + # seed_everything is a bit log-happy + with SuppressLogging(logging.INFO): + seed_everything(seed=None) diff --git a/openfold/utils/superimposition.py b/openfold/utils/superimposition.py new file mode 100644 index 0000000000000000000000000000000000000000..835498f014b8083b3f715bfb3b8dd426ce296f3e --- /dev/null +++ b/openfold/utils/superimposition.py @@ -0,0 +1,107 @@ +# Copyright 2021 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from Bio.SVDSuperimposer import SVDSuperimposer +import numpy as np +import torch + + +def _superimpose_np(reference, coords): + """ + Superimposes coordinates onto a reference by minimizing RMSD using SVD. + + Args: + reference: + [N, 3] reference array + coords: + [N, 3] array + Returns: + A tuple of [N, 3] superimposed coords and the final RMSD. + """ + sup = SVDSuperimposer() + sup.set(reference, coords) + sup.run() + return sup + +def _superimpose_single(reference, coords): + reference_np = reference.detach().cpu().numpy() + coords_np = coords.detach().cpu().numpy() + sup = _superimpose_np(reference_np, coords_np) + rot, tran = sup.get_rotran() + superimposed, rmsd = sup.get_transformed(), sup.get_rms() + return coords.new_tensor(superimposed), coords.new_tensor(rmsd), rot, tran + + +def superimpose(reference, coords, mask, return_transform=False): + """ + Superimposes coordinates onto a reference by minimizing RMSD using SVD. + + Args: + reference: + [*, N, 3] reference tensor + coords: + [*, N, 3] tensor + mask: + [*, N] tensor + Returns: + A tuple of [*, N, 3] superimposed coords and [*] final RMSDs. + """ + def select_unmasked_coords(coords, mask): + return torch.masked_select( + coords, + (mask > 0.)[..., None], + ).reshape(-1, 3) + + batch_dims = reference.shape[:-2] + flat_reference = reference.reshape((-1,) + reference.shape[-2:]) + flat_coords = coords.reshape((-1,) + reference.shape[-2:]) + flat_mask = mask.reshape((-1,) + mask.shape[-1:]) + superimposed_list = [] + rmsds = [] + rots = [] + trans = [] + for r, c, m in zip(flat_reference, flat_coords, flat_mask): + r_unmasked_coords = select_unmasked_coords(r, m) + c_unmasked_coords = select_unmasked_coords(c, m) + superimposed, rmsd, rot, tran = _superimpose_single( + r_unmasked_coords, + c_unmasked_coords + ) + rots.append(rot) + trans.append(tran) + # This is very inelegant, but idk how else to invert the masking + # procedure. + count = 0 + superimposed_full_size = torch.zeros_like(r) + for i, unmasked in enumerate(m): + if(unmasked): + superimposed_full_size[i] = superimposed[count] + count += 1 + + superimposed_list.append(superimposed_full_size) + rmsds.append(rmsd) + + superimposed_stacked = torch.stack(superimposed_list, dim=0) + rmsds_stacked = torch.stack(rmsds, dim=0) + rots_stacked = torch.tensor(np.stack(rots, axis=0), device=coords.device) + trans_stacked = torch.tensor(np.stack(trans, axis=0), device=coords.device) + + superimposed_reshaped = superimposed_stacked.reshape( + batch_dims + coords.shape[-2:] + ) + rmsds_reshaped = rmsds_stacked.reshape( + batch_dims + ) + if return_transform: + return superimposed_reshaped, rmsds_reshaped, rots_stacked, trans_stacked + return superimposed_reshaped, rmsds_reshaped diff --git a/openfold/utils/suppress_output.py b/openfold/utils/suppress_output.py new file mode 100644 index 0000000000000000000000000000000000000000..d7c821b36a6b8d047e541a51faceef037f48ff90 --- /dev/null +++ b/openfold/utils/suppress_output.py @@ -0,0 +1,26 @@ +import logging +import sys + + +class SuppressStdout: + def __enter__(self): + self.stdout = sys.stdout + dev_null = open("/dev/null", "w") + sys.stdout = dev_null + + def __exit__(self, typ, value, traceback): + fp = sys.stdout + sys.stdout = self.stdout + fp.close() + + +class SuppressLogging: + def __init__(self, level): + self.level = level + + def __enter__(self): + logging.disable(self.level) + + def __exit__(self, typ, value, traceback): + logging.disable(logging.NOTSET) + diff --git a/openfold/utils/tensor_utils.py b/openfold/utils/tensor_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5e8e4b6b5e9a26ccf1c5cedf1ff10102e75b2f --- /dev/null +++ b/openfold/utils/tensor_utils.py @@ -0,0 +1,408 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +import torch +import torch.nn as nn +from typing import Tuple, List, Callable, Any, Dict, Sequence, Optional + + +def permute_final_dims(tensor: torch.Tensor, inds: List[int]): + zero_index = -1 * len(inds) + first_inds = list(range(len(tensor.shape[:zero_index]))) + return tensor.permute(first_inds + [zero_index + i for i in inds]) + + +def flatten_final_dims(t: torch.Tensor, no_dims: int): + return t.reshape(t.shape[:-no_dims] + (-1,)) + + +def masked_mean(mask, value, dim, eps=1e-4): + mask = mask.expand(*value.shape) + return torch.sum(mask * value, dim=dim) / (eps + torch.sum(mask, dim=dim)) + + +def pts_to_distogram(pts, min_bin=2.3125, max_bin=21.6875, no_bins=64): + boundaries = torch.linspace( + min_bin, max_bin, no_bins - 1, device=pts.device + ) + dists = torch.sqrt( + torch.sum((pts.unsqueeze(-2) - pts.unsqueeze(-3)) ** 2, dim=-1) + ) + return torch.bucketize(dists, boundaries) + + +def dict_multimap(fn, dicts): + first = dicts[0] + new_dict = {} + for k, v in first.items(): + all_v = [d[k] for d in dicts] + if type(v) is dict: + new_dict[k] = dict_multimap(fn, all_v) + else: + new_dict[k] = fn(all_v) + + return new_dict + + +def one_hot(x, v_bins): + reshaped_bins = v_bins.view(((1,) * len(x.shape)) + (len(v_bins),)) + diffs = x[..., None] - reshaped_bins + am = torch.argmin(torch.abs(diffs), dim=-1) + return nn.functional.one_hot(am, num_classes=len(v_bins)).float() + + +def batched_gather(data, inds, dim=0, no_batch_dims=0): + ranges = [] + for i, s in enumerate(data.shape[:no_batch_dims]): + r = torch.arange(s) + r = r.view(*(*((1,) * i), -1, *((1,) * (len(inds.shape) - i - 1)))) + ranges.append(r) + + remaining_dims = [ + slice(None) for _ in range(len(data.shape) - no_batch_dims) + ] + remaining_dims[dim - no_batch_dims if dim >= 0 else dim] = inds + ranges.extend(remaining_dims) + return data[ranges] + + +# With tree_map, a poor man's JAX tree_map +def dict_map(fn, dic, leaf_type): + new_dict = {} + for k, v in dic.items(): + if type(v) is dict: + new_dict[k] = dict_map(fn, v, leaf_type) + else: + new_dict[k] = tree_map(fn, v, leaf_type) + + return new_dict + + +def tree_map(fn, tree, leaf_type): + if isinstance(tree, dict): + return dict_map(fn, tree, leaf_type) + elif isinstance(tree, list): + return [tree_map(fn, x, leaf_type) for x in tree] + elif isinstance(tree, tuple): + return tuple([tree_map(fn, x, leaf_type) for x in tree]) + elif isinstance(tree, leaf_type): + return fn(tree) + else: + print(type(tree)) + raise ValueError("Not supported") + + +tensor_tree_map = partial(tree_map, leaf_type=torch.Tensor) + +def _fetch_dims(tree): + shapes = [] + tree_type = type(tree) + if tree_type is dict: + for v in tree.values(): + shapes.extend(_fetch_dims(v)) + elif tree_type is list or tree_type is tuple: + for t in tree: + shapes.extend(_fetch_dims(t)) + elif tree_type is torch.Tensor: + shapes.append(tree.shape) + else: + raise ValueError("Not supported") + + return shapes + + +@torch.jit.ignore +def _flat_idx_to_idx( + flat_idx: int, + dims: Tuple[int], +) -> Tuple[int]: + idx = [] + for d in reversed(dims): + idx.append(flat_idx % d) + flat_idx = flat_idx // d + + return tuple(reversed(idx)) + + +@torch.jit.ignore +def _get_minimal_slice_set( + start: Sequence[int], + end: Sequence[int], + dims: int, + start_edges: Optional[Sequence[bool]] = None, + end_edges: Optional[Sequence[bool]] = None, +) -> Sequence[Tuple[int]]: + """ + Produces an ordered sequence of tensor slices that, when used in + sequence on a tensor with shape dims, yields tensors that contain every + leaf in the contiguous range [start, end]. Care is taken to yield a + short sequence of slices, and perhaps even the shortest possible (I'm + pretty sure it's the latter). + + end is INCLUSIVE. + """ + # start_edges and end_edges both indicate whether, starting from any given + # dimension, the start/end index is at the top/bottom edge of the + # corresponding tensor, modeled as a tree + def reduce_edge_list(l): + tally = 1 + for i in range(len(l)): + reversed_idx = -1 * (i + 1) + l[reversed_idx] *= tally + tally = l[reversed_idx] + + if(start_edges is None): + start_edges = [s == 0 for s in start] + reduce_edge_list(start_edges) + if(end_edges is None): + end_edges = [e == (d - 1) for e,d in zip(end, dims)] + reduce_edge_list(end_edges) + + # Base cases. Either start/end are empty and we're done, or the final, + # one-dimensional tensor can be simply sliced + if(len(start) == 0): + return [tuple()] + elif(len(start) == 1): + return [(slice(start[0], end[0] + 1),)] + + slices = [] + path = [] + + # Dimensions common to start and end can be selected directly + for s,e in zip(start, end): + if(s == e): + path.append(slice(s, s + 1)) + else: + break + + path = tuple(path) + divergence_idx = len(path) + + # start == end, and we're done + if(divergence_idx == len(dims)): + return [tuple(path)] + + def upper(): + sdi = start[divergence_idx] + return [ + path + (slice(sdi, sdi + 1),) + s for s in + _get_minimal_slice_set( + start[divergence_idx + 1:], + [d - 1 for d in dims[divergence_idx + 1:]], + dims[divergence_idx + 1:], + start_edges=start_edges[divergence_idx + 1:], + end_edges=[1 for _ in end_edges[divergence_idx + 1:]] + ) + ] + + def lower(): + edi = end[divergence_idx] + return [ + path + (slice(edi, edi + 1),) + s for s in + _get_minimal_slice_set( + [0 for _ in start[divergence_idx + 1:]], + end[divergence_idx + 1:], + dims[divergence_idx + 1:], + start_edges=[1 for _ in start_edges[divergence_idx + 1:]], + end_edges=end_edges[divergence_idx + 1:], + ) + ] + + # If both start and end are at the edges of the subtree rooted at + # divergence_idx, we can just select the whole subtree at once + if(start_edges[divergence_idx] and end_edges[divergence_idx]): + slices.append( + path + (slice(start[divergence_idx], end[divergence_idx] + 1),) + ) + # If just start is at the edge, we can grab almost all of the subtree, + # treating only the ragged bottom edge as an edge case + elif(start_edges[divergence_idx]): + slices.append( + path + (slice(start[divergence_idx], end[divergence_idx]),) + ) + slices.extend(lower()) + # Analogous to the previous case, but the top is ragged this time + elif(end_edges[divergence_idx]): + slices.extend(upper()) + slices.append( + path + (slice(start[divergence_idx] + 1, end[divergence_idx] + 1),) + ) + # If both sides of the range are ragged, we need to handle both sides + # separately. If there's contiguous meat in between them, we can index it + # in one big chunk + else: + slices.extend(upper()) + middle_ground = end[divergence_idx] - start[divergence_idx] + if(middle_ground > 1): + slices.append( + path + (slice(start[divergence_idx] + 1, end[divergence_idx]),) + ) + slices.extend(lower()) + + return [tuple(s) for s in slices] + + +@torch.jit.ignore +def _chunk_slice( + t: torch.Tensor, + flat_start: int, + flat_end: int, + no_batch_dims: int, +) -> torch.Tensor: + """ + Equivalent to + + t.reshape((-1,) + t.shape[no_batch_dims:])[flat_start:flat_end] + + but without the need for the initial reshape call, which can be + memory-intensive in certain situations. The only reshape operations + in this function are performed on sub-tensors that scale with + (flat_end - flat_start), the chunk size. + """ + + batch_dims = t.shape[:no_batch_dims] + start_idx = list(_flat_idx_to_idx(flat_start, batch_dims)) + # _get_minimal_slice_set is inclusive + end_idx = list(_flat_idx_to_idx(flat_end - 1, batch_dims)) + + # Get an ordered list of slices to perform + slices = _get_minimal_slice_set( + start_idx, + end_idx, + batch_dims, + ) + + sliced_tensors = [t[s] for s in slices] + + return torch.cat( + [s.view((-1,) + t.shape[no_batch_dims:]) for s in sliced_tensors] + ) + + +def chunk_layer( + layer: Callable, + inputs: Dict[str, Any], + chunk_size: int, + no_batch_dims: int, + low_mem: bool = False, +) -> Any: + """ + Implements the "chunking" procedure described in section 1.11.8. + + Layer outputs and inputs are assumed to be simple "pytrees," + consisting only of (arbitrarily nested) lists, tuples, and dicts with + torch.Tensor leaves. + + Args: + layer: + The layer to be applied chunk-wise + inputs: + A (non-nested) dictionary of keyworded inputs. All leaves must + be tensors and must share the same batch dimensions. + chunk_size: + The number of sub-batches per chunk. If multiple batch + dimensions are specified, a "sub-batch" is defined as a single + indexing of all batch dimensions simultaneously (s.t. the + number of sub-batches is the product of the batch dimensions). + no_batch_dims: + How many of the initial dimensions of each input tensor can + be considered batch dimensions. + low_mem: + Avoids flattening potentially large input tensors. Unnecessary + in most cases, and is ever so slightly slower than the default + setting. + Returns: + The reassembled output of the layer on the inputs. + """ + if not (len(inputs) > 0): + raise ValueError("Must provide at least one input") + + initial_dims = [shape[:no_batch_dims] for shape in _fetch_dims(inputs)] + orig_batch_dims = tuple([max(s) for s in zip(*initial_dims)]) + + def _prep_inputs(t): + # TODO: make this more memory efficient. This sucks + if(not low_mem): + if not sum(t.shape[:no_batch_dims]) == no_batch_dims: + t = t.expand(orig_batch_dims + t.shape[no_batch_dims:]) + t = t.reshape(-1, *t.shape[no_batch_dims:]) + else: + t = t.expand(orig_batch_dims + t.shape[no_batch_dims:]) + return t + + prepped_inputs = tensor_tree_map(_prep_inputs, inputs) + + flat_batch_dim = 1 + for d in orig_batch_dims: + flat_batch_dim *= d + + no_chunks = flat_batch_dim // chunk_size + ( + flat_batch_dim % chunk_size != 0 + ) + + i = 0 + out = None + for _ in range(no_chunks): + # Chunk the input + if(not low_mem): + select_chunk = ( + lambda t: t[i : i + chunk_size] if t.shape[0] != 1 else t + ) + else: + select_chunk = ( + partial( + _chunk_slice, + flat_start=i, + flat_end=min(flat_batch_dim, i + chunk_size), + no_batch_dims=len(orig_batch_dims) + ) + ) + + chunks = tensor_tree_map(select_chunk, prepped_inputs) + + # Run the layer on the chunk + output_chunk = layer(**chunks) + + # Allocate space for the output + if out is None: + allocate = lambda t: t.new_zeros((flat_batch_dim,) + t.shape[1:]) + out = tensor_tree_map(allocate, output_chunk) + + # Put the chunk in its pre-allocated space + out_type = type(output_chunk) + if out_type is dict: + def assign(d1, d2): + for k, v in d1.items(): + if type(v) is dict: + assign(v, d2[k]) + else: + v[i : i + chunk_size] = d2[k] + + assign(out, output_chunk) + elif out_type is tuple: + for x1, x2 in zip(out, output_chunk): + x1[i : i + chunk_size] = x2 + elif out_type is torch.Tensor: + out[i : i + chunk_size] = output_chunk + else: + raise ValueError("Not supported") + + i += chunk_size + + reshape = lambda t: t.view(orig_batch_dims + t.shape[1:]) + out = tensor_tree_map(reshape, out) + + return out diff --git a/openfold/utils/validation_metrics.py b/openfold/utils/validation_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..81b8da9860452030b6e9e1c2f46b8e49fdc17b75 --- /dev/null +++ b/openfold/utils/validation_metrics.py @@ -0,0 +1,38 @@ +# Copyright 2021 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import torch + + +def gdt(p1, p2, mask, cutoffs): + n = torch.sum(mask, dim=-1) + + p1 = p1.float() + p2 = p2.float() + distances = torch.sqrt(torch.sum((p1 - p2)**2, dim=-1)) + + scores = [] + for c in cutoffs: + score = torch.sum((distances <= c) * mask, dim=-1) / n + scores.append(score) + + return sum(scores) / len(scores) + + +def gdt_ts(p1, p2, mask): + return gdt(p1, p2, mask, [1., 2., 4., 8.]) + + +def gdt_ha(p1, p2, mask): + return gdt(p1, p2, mask, [0.5, 1., 2., 4.]) + diff --git a/pepflow/modules/common/geometry.py b/pepflow/modules/common/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe178e333529cab3106db1815b5862186a38154 --- /dev/null +++ b/pepflow/modules/common/geometry.py @@ -0,0 +1,522 @@ +import torch +import torch.nn.functional as F + + +from pepflow.modules.protein.constants import ( + BBHeavyAtom, + backbone_atom_coordinates_tensor, + bb_oxygen_coordinate_tensor, +) +from pepflow.modules.common.topology import get_terminus_flag + + +def safe_norm(x, dim=-1, keepdim=False, eps=1e-8, sqrt=True): + out = torch.clamp(torch.sum(torch.square(x), dim=dim, keepdim=keepdim), min=eps) + return torch.sqrt(out) if sqrt else out + + +def align( + pos_1: torch.Tensor, + pos_2: torch.Tensor, + pos_mask: torch.Tensor, +) -> tuple[torch.Tensor, torch.Tensor]: + """(L,A,3),(L,A) align pos14_model to pos14_native, return aligned pos""" + L, A, _ = pos_1.shape + x = torch.masked_select(pos_1, pos_mask.bool().unsqueeze(-1)).reshape(-1, 3) + y = torch.masked_select(pos_2, pos_mask.bool().unsqueeze(-1)).reshape(-1, 3) + xm, ym = x.mean(dim=0), y.mean(dim=0) # (1,A,3) + x = x - x.mean(dim=0, keepdim=True) # (L,A,3) + y = y - y.mean(dim=0, keepdim=True) # (L,A,3) + s = x.T @ y + u, sigma, vt = torch.linalg.svd(s) + r = vt.T @ u.T # (3,3) + t = ym - r @ xm # + pos_1_aligned = ((r@pos_1.view(-1, 3).T).T + t).reshape(L, A, 3) # (-1,3) -> (L,A,3) + + return pos_1_aligned, pos_2 + +def batch_align( + pos_1: torch.Tensor, + pos_2: torch.Tensor, + pos_mask: torch.Tensor, +)-> tuple[torch.Tensor, torch.Tensor]: + """(B,L,A,3),(B,L,A) Batch align pos_1 to pos_2, return aligned pos_1 and pos_2""" + x = torch.masked_select(pos_1, pos_mask.unsqueeze(-1)).reshape(pos_1.size(0), -1, 3) + y = torch.masked_select(pos_2, pos_mask.unsqueeze(-1)).reshape(pos_2.size(0), -1, 3) + xm = x.mean(dim=1, keepdim=True) + ym = y.mean(dim=1, keepdim=True) + x = x - xm + y = y - ym + s = x.transpose(-1, -2) @ y + u, sigma, vt = torch.linalg.svd(s) + r = vt.transpose(-1, -2) @ u.transpose(-1, -2) + t = ym - (r @ xm.transpose(-1, -2)).transpose(-1, -2) + pos_1_aligned = ((r@pos_1.reshape(pos_1.size(0), -1, 3).transpose(-1, -2)).transpose(-1, -2) + t).reshape(pos_1.size(0), pos_1.size(1), -1, 3) + + return pos_1_aligned, pos_2 + + +def pairwise_distances(x, y=None, return_v=False): + """ + Args: + x: (B, N, d) + y: (B, M, d) + """ + if y is None: y = x + v = x.unsqueeze(2) - y.unsqueeze(1) # (B, N, M, d) + d = safe_norm(v, dim=-1) + if return_v: + return d, v + else: + return d + + +def normalize_vector(v, dim, eps=1e-6): + return v / (torch.linalg.norm(v, ord=2, dim=dim, keepdim=True) + eps) + + +def project_v2v(v, e, dim): + """ + Description: + Project vector `v` onto vector `e`. + Args: + v: (N, L, 3). + e: (N, L, 3). + """ + return (e * v).sum(dim=dim, keepdim=True) * e + + +def construct_3d_basis(center, p1, p2): + """ + Args: + center: (N, L, 3), usually the position of C_alpha. + p1: (N, L, 3), usually the position of C. + p2: (N, L, 3), usually the position of N. + Returns + A batch of orthogonal basis matrix, (N, L, 3, 3cols_index). + The matrix is composed of 3 column vectors: [e1, e2, e3]. + """ + v1 = p1 - center # (N, L, 3) + e1 = normalize_vector(v1, dim=-1) + + v2 = p2 - center # (N, L, 3) + u2 = v2 - project_v2v(v2, e1, dim=-1) + e2 = normalize_vector(u2, dim=-1) + + e3 = torch.cross(e1, e2, dim=-1) # (N, L, 3) + + mat = torch.cat([ + e1.unsqueeze(-1), e2.unsqueeze(-1), e3.unsqueeze(-1) + ], dim=-1) # (N, L, 3, 3_index) + return mat + + +def local_to_global(R, t, p): + """ + Description: + Convert local (internal) coordinates to global (external) coordinates q. + q <- Rp + t + Args: + R: (N, L, 3, 3). + t: (N, L, 3). + p: Local coordinates, (N, L, ..., 3). + Returns: + q: Global coordinates, (N, L, ..., 3). + """ + assert p.size(-1) == 3 + p_size = p.size() + N, L = p_size[0], p_size[1] + + p = p.view(N, L, -1, 3).transpose(-1, -2) # (N, L, *, 3) -> (N, L, 3, *) + q = torch.matmul(R, p) + t.unsqueeze(-1) # (N, L, 3, *) + q = q.transpose(-1, -2).reshape(p_size) # (N, L, 3, *) -> (N, L, *, 3) -> (N, L, ..., 3) + return q + + +def global_to_local(R, t, q): + """ + Description: + Convert global (external) coordinates q to local (internal) coordinates p. + p <- R^{T}(q - t) + Args: + R: (N, L, 3, 3). + t: (N, L, 3). + q: Global coordinates, (N, L, ..., 3). + Returns: + p: Local coordinates, (N, L, ..., 3). + """ + assert q.size(-1) == 3 + q_size = q.size() + N, L = q_size[0], q_size[1] + + q = q.reshape(N, L, -1, 3).transpose(-1, -2) # (N, L, *, 3) -> (N, L, 3, *) + p = torch.matmul(R.transpose(-1, -2), (q - t.unsqueeze(-1))) # (N, L, 3, *) + p = p.transpose(-1, -2).reshape(q_size) # (N, L, 3, *) -> (N, L, *, 3) -> (N, L, ..., 3) + return p + + +def apply_rotation_to_vector(R, p): + return local_to_global(R, torch.zeros_like(p), p) + + +def compose_rotation_and_translation(R1, t1, R2, t2): + """ + Args: + R1,t1: Frame basis and coordinate, (N, L, 3, 3), (N, L, 3). + R2,t2: Rotation and translation to be applied to (R1, t1), (N, L, 3, 3), (N, L, 3). + Returns + R_new <- R1R2 + t_new <- R1t2 + t1 + """ + R_new = torch.matmul(R1, R2) # (N, L, 3, 3) + t_new = torch.matmul(R1, t2.unsqueeze(-1)).squeeze(-1) + t1 + return R_new, t_new + + +def compose_chain(Ts): + while len(Ts) >= 2: + R1, t1 = Ts[-2] + R2, t2 = Ts[-1] + T_next = compose_rotation_and_translation(R1, t1, R2, t2) + Ts = Ts[:-2] + [T_next] + return Ts[0] + + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +def quaternion_to_rotation_matrix(quaternions): + """ + Convert rotations given as quaternions to rotation matrices. + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + quaternions = F.normalize(quaternions, dim=-1) + r, i, j, k = torch.unbind(quaternions, -1) + two_s = 2.0 / (quaternions * quaternions).sum(-1) + + o = torch.stack( + ( + 1 - two_s * (j * j + k * k), + two_s * (i * j - k * r), + two_s * (i * k + j * r), + two_s * (i * j + k * r), + 1 - two_s * (i * i + k * k), + two_s * (j * k - i * r), + two_s * (i * k - j * r), + two_s * (j * k + i * r), + 1 - two_s * (i * i + j * j), + ), + -1, + ) + return o.reshape(quaternions.shape[:-1] + (3, 3)) + + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +""" +BSD License + +For PyTorch3D software + +Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Meta nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +def quaternion_1ijk_to_rotation_matrix(q): + """ + (1 + ai + bj + ck) -> R + Args: + q: (..., 3) + """ + b, c, d = torch.unbind(q, dim=-1) + s = torch.sqrt(1 + b**2 + c**2 + d**2) + a, b, c, d = 1/s, b/s, c/s, d/s + + o = torch.stack( + ( + a**2 + b**2 - c**2 - d**2, 2*b*c - 2*a*d, 2*b*d + 2*a*c, + 2*b*c + 2*a*d, a**2 - b**2 + c**2 - d**2, 2*c*d - 2*a*b, + 2*b*d - 2*a*c, 2*c*d + 2*a*b, a**2 - b**2 - c**2 + d**2, + ), + -1, + ) + return o.reshape(q.shape[:-1] + (3, 3)) + + +def repr_6d_to_rotation_matrix(x): + """ + Args: + x: 6D representations, (..., 6). + Returns: + Rotation matrices, (..., 3, 3_index). + """ + a1, a2 = x[..., 0:3], x[..., 3:6] + b1 = normalize_vector(a1, dim=-1) + b2 = normalize_vector(a2 - project_v2v(a2, b1, dim=-1), dim=-1) + b3 = torch.cross(b1, b2, dim=-1) + + mat = torch.cat([ + b1.unsqueeze(-1), b2.unsqueeze(-1), b3.unsqueeze(-1) + ], dim=-1) # (N, L, 3, 3_index) + return mat + + +def dihedral_from_four_points(p0, p1, p2, p3): + """ + Args: + p0-3: (*, 3). + Returns: + Dihedral angles in radian, (*, ). + """ + v0 = p2 - p1 + v1 = p0 - p1 + v2 = p3 - p2 + u1 = torch.cross(v0, v1, dim=-1) + n1 = u1 / torch.linalg.norm(u1, dim=-1, keepdim=True) + u2 = torch.cross(v0, v2, dim=-1) + n2 = u2 / torch.linalg.norm(u2, dim=-1, keepdim=True) + sgn = torch.sign( (torch.cross(v1, v2, dim=-1) * v0).sum(-1) ) + dihed = sgn*torch.acos( (n1 * n2).sum(-1).clamp(min=-0.999999, max=0.999999) ) + dihed = torch.nan_to_num(dihed) + return dihed + + +def knn_gather(idx, value): + """ + Args: + idx: (B, N, K) + value: (B, M, d) + Returns: + (B, N, K, d) + """ + N, d = idx.size(1), value.size(-1) + idx = idx.unsqueeze(-1).repeat(1, 1, 1, d) # (B, N, K, d) + value = value.unsqueeze(1).repeat(1, N, 1, 1) # (B, N, M, d) + return torch.gather(value, dim=2, index=idx) + + +def knn_points(q, p, K): + """ + Args: + q: (B, M, d) + p: (B, N, d) + Returns: + (B, M, K), (B, M, K), (B, M, K, d) + """ + _, L, _ = p.size() + d = pairwise_distances(q, p) # (B, N, M) + dist, idx = d.topk(min(L, K), dim=-1, largest=False) # (B, M, K), (B, M, K) + return dist, idx, knn_gather(idx, p) + + +def angstrom_to_nm(x): + return x / 10 + + +def nm_to_angstrom(x): + return x * 10 + + +def get_backbone_dihedral_angles(pos_atoms, chain_nb, res_nb, mask): + """ + Args: + pos_atoms: (N, L, A, 3). + chain_nb: (N, L). + res_nb: (N, L). + mask: (N, L). + Returns: + bb_dihedral: Omega, Phi, and Psi angles in radian, (N, L, 3). + mask_bb_dihed: Masks of dihedral angles, (N, L, 3). + """ + pos_N = pos_atoms[:, :, BBHeavyAtom.N] # (N, L, 3) + pos_CA = pos_atoms[:, :, BBHeavyAtom.CA] + pos_C = pos_atoms[:, :, BBHeavyAtom.C] + + N_term_flag, C_term_flag = get_terminus_flag(chain_nb, res_nb, mask) # (N, L) + omega_mask = torch.logical_not(N_term_flag) + phi_mask = torch.logical_not(N_term_flag) + psi_mask = torch.logical_not(C_term_flag) + + # N-termini don't have omega and phi + omega = F.pad( + dihedral_from_four_points(pos_CA[:, :-1], pos_C[:, :-1], pos_N[:, 1:], pos_CA[:, 1:]), + pad=(1, 0), value=0, + ) + phi = F.pad( + dihedral_from_four_points(pos_C[:, :-1], pos_N[:, 1:], pos_CA[:, 1:], pos_C[:, 1:]), + pad=(1, 0), value=0, + ) + + # C-termini don't have psi + psi = F.pad( + dihedral_from_four_points(pos_N[:, :-1], pos_CA[:, :-1], pos_C[:, :-1], pos_N[:, 1:]), + pad=(0, 1), value=0, + ) + + mask_bb_dihed = torch.stack([omega_mask, phi_mask, psi_mask], dim=-1) + bb_dihedral = torch.stack([omega, phi, psi], dim=-1) * mask_bb_dihed + return bb_dihedral, mask_bb_dihed + + +def pairwise_dihedrals(pos_atoms): + """ + Args: + pos_atoms: (N, L, A, 3). + Returns: + Inter-residue Phi and Psi angles, (N, L, L, 2). + """ + N, L = pos_atoms.shape[:2] + pos_N = pos_atoms[:, :, BBHeavyAtom.N] # (N, L, 3) + pos_CA = pos_atoms[:, :, BBHeavyAtom.CA] + pos_C = pos_atoms[:, :, BBHeavyAtom.C] + + ir_phi = dihedral_from_four_points( + pos_C[:,:,None].expand(N, L, L, 3), + pos_N[:,None,:].expand(N, L, L, 3), + pos_CA[:,None,:].expand(N, L, L, 3), + pos_C[:,None,:].expand(N, L, L, 3) + ) + ir_psi = dihedral_from_four_points( + pos_N[:,:,None].expand(N, L, L, 3), + pos_CA[:,:,None].expand(N, L, L, 3), + pos_C[:,:,None].expand(N, L, L, 3), + pos_N[:,None,:].expand(N, L, L, 3) + ) + ir_dihed = torch.stack([ir_phi, ir_psi], dim=-1) + return ir_dihed + + +def apply_rotation_matrix_to_rot6d(R, O): + """ + Args: + R: (..., 3, 3) + O: (..., 6) + Returns: + Rotated 6D representation, (..., 6). + """ + u1, u2 = O[..., :3, None], O[..., 3:, None] # (..., 3, 1) + v1 = torch.matmul(R, u1).squeeze(-1) # (..., 3) + v2 = torch.matmul(R, u2).squeeze(-1) + return torch.cat([v1, v2], dim=-1) + + +def normalize_rot6d(O): + """ + Args: + O: (..., 6) + """ + u1, u2 = O[..., :3], O[..., 3:] # (..., 3) + v1 = F.normalize(u1, p=2, dim=-1) # (..., 3) + v2 = F.normalize(u2 - project_v2v(u2, v1), p=2, dim=-1) + return torch.cat([v1, v2], dim=-1) + + +def reconstruct_backbone(R, t, aa, chain_nb, res_nb, mask): + """ + Args: + R: (N, L, 3, 3) + t: (N, L, 3) + aa: (N, L) + chain_nb: (N, L) + res_nb: (N, L) + mask: (N, L) + Returns: + Reconstructed backbone atoms, (N, L, 4, 3). + """ + N, L = aa.size() + # atom_coords = restype_heavyatom_rigid_group_positions.clone().to(t) # (21, 14, 3) + bb_coords = backbone_atom_coordinates_tensor.clone().to(t) # (21, 3, 3) + oxygen_coord = bb_oxygen_coordinate_tensor.clone().to(t) # (21, 3) + aa = aa.clamp(min=0, max=20) # 20 for UNK + + bb_coords = bb_coords[aa.flatten()].reshape(N, L, -1, 3) # (N, L, 3, 3) + oxygen_coord = oxygen_coord[aa.flatten()].reshape(N, L, -1) # (N, L, 3) + bb_pos = local_to_global(R, t, bb_coords) # Global coordinates of N, CA, C. (N, L, 3, 3). + + # Compute PSI angle + bb_dihedral, _ = get_backbone_dihedral_angles(bb_pos, chain_nb, res_nb, mask) + psi = bb_dihedral[..., 2] # (N, L) + # Make rotation matrix for PSI + sin_psi = torch.sin(psi).reshape(N, L, 1, 1) + cos_psi = torch.cos(psi).reshape(N, L, 1, 1) + zero = torch.zeros_like(sin_psi) + one = torch.ones_like(sin_psi) + row1 = torch.cat([one, zero, zero], dim=-1) # (N, L, 1, 3) + row2 = torch.cat([zero, cos_psi, -sin_psi], dim=-1) # (N, L, 1, 3) + row3 = torch.cat([zero, sin_psi, cos_psi], dim=-1) # (N, L, 1, 3) + R_psi = torch.cat([row1, row2, row3], dim=-2) # (N, L, 3, 3) + + # Compute rotoation and translation of PSI frame, and position of O. + R_psi, t_psi = compose_chain([ + (R, t), # Backbone + (R_psi, torch.zeros_like(t)), # PSI angle + ]) + O_pos = local_to_global(R_psi, t_psi, oxygen_coord.reshape(N, L, 1, 3)) + + bb_pos = torch.cat([bb_pos, O_pos], dim=2) # (N, L, 4, 3) + return bb_pos + + +def reconstruct_backbone_partially(pos_ctx, R_new, t_new, aa, chain_nb, res_nb, mask_atoms, mask_recons): + """ + Args: + pos: (N, L, A, 3). + R_new: (N, L, 3, 3). + t_new: (N, L, 3). + mask_atoms: (N, L, A). + mask_recons:(N, L). + Returns: + pos_new: (N, L, A, 3). + mask_new: (N, L, A). + """ + N, L, A = mask_atoms.size() + + mask_res = mask_atoms[:, :, BBHeavyAtom.CA] + pos_recons = reconstruct_backbone(R_new, t_new, aa, chain_nb, res_nb, mask_res) # (N, L, 4, 3) + pos_recons = F.pad(pos_recons, pad=(0, 0, 0, A-4), value=0) # (N, L, A, 3) + + pos_new = torch.where( + mask_recons[:, :, None, None].expand_as(pos_ctx), + pos_recons, pos_ctx + ) # (N, L, A, 3) + + mask_bb_atoms = torch.zeros_like(mask_atoms) + mask_bb_atoms[:, :, :4] = True + mask_new = torch.where( + mask_recons[:, :, None].expand_as(mask_atoms), + mask_bb_atoms, mask_atoms + ) + + return pos_new, mask_new diff --git a/pepflow/modules/common/layers.py b/pepflow/modules/common/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..43109cd52ec185eddcd635e3cc9e691b8a39bbc8 --- /dev/null +++ b/pepflow/modules/common/layers.py @@ -0,0 +1,167 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def mask_zero(mask, value): + return torch.where(mask, value, torch.zeros_like(value)) + + +def clampped_one_hot(x, num_classes): + mask = (x >= 0) & (x < num_classes) # (N, L) + x = x.clamp(min=0, max=num_classes-1) + y = F.one_hot(x, num_classes) * mask[...,None] # (N, L, C) + return y + + +def sample_from(c): + """sample from c""" + N,L,K = c.size() + c = c.view(N*L,K) + 1e-8 + x = torch.multinomial(c,1).view(N,L) + return x + +class DistanceToBins(nn.Module): + + def __init__(self, dist_min=0.0, dist_max=20.0, num_bins=64, use_onehot=False): + super().__init__() + self.dist_min = dist_min + self.dist_max = dist_max + self.num_bins = num_bins + self.use_onehot = use_onehot + + if use_onehot: + offset = torch.linspace(dist_min, dist_max, self.num_bins) + else: + offset = torch.linspace(dist_min, dist_max, self.num_bins-1) # 1 overflow flag + self.coeff = -0.5 / ((offset[1] - offset[0]) * 0.2).item() ** 2 # `*0.2`: makes it not too blurred + self.register_buffer('offset', offset) + + @property + def out_channels(self): + return self.num_bins + + def forward(self, dist, dim, normalize=True): + """ + Args: + dist: (N, *, 1, *) + Returns: + (N, *, num_bins, *) + """ + assert dist.size()[dim] == 1 + offset_shape = [1] * len(dist.size()) + offset_shape[dim] = -1 + + if self.use_onehot: + diff = torch.abs(dist - self.offset.view(*offset_shape)) # (N, *, num_bins, *) + bin_idx = torch.argmin(diff, dim=dim, keepdim=True) # (N, *, 1, *) + y = torch.zeros_like(diff).scatter_(dim=dim, index=bin_idx, value=1.0) + else: + overflow_symb = (dist >= self.dist_max).float() # (N, *, 1, *) + y = dist - self.offset.view(*offset_shape) # (N, *, num_bins-1, *) + y = torch.exp(self.coeff * torch.pow(y, 2)) # (N, *, num_bins-1, *) + y = torch.cat([y, overflow_symb], dim=dim) # (N, *, num_bins, *) + if normalize: + y = y / y.sum(dim=dim, keepdim=True) + + return y + + +class PositionalEncoding(nn.Module): + + def __init__(self, num_funcs=6): + super().__init__() + self.num_funcs = num_funcs + self.register_buffer('freq_bands', 2.0 ** torch.linspace(0.0, num_funcs-1, num_funcs)) + + def get_out_dim(self, in_dim): + return in_dim * (2 * self.num_funcs + 1) + + def forward(self, x): + """ + Args: + x: (..., d). + """ + shape = list(x.shape[:-1]) + [-1] + x = x.unsqueeze(-1) # (..., d, 1) + code = torch.cat([x, torch.sin(x * self.freq_bands), torch.cos(x * self.freq_bands)], dim=-1) # (..., d, 2f+1) + code = code.reshape(shape) + return code + + +class AngularEncoding(nn.Module): + + def __init__(self, num_funcs=3): + super().__init__() + self.num_funcs = num_funcs + self.register_buffer('freq_bands', torch.FloatTensor( + [i+1 for i in range(num_funcs)] + [1./(i+1) for i in range(num_funcs)] + )) + + def get_out_dim(self, in_dim): + return in_dim * (1 + 2*2*self.num_funcs) + + def forward(self, x): + """ + Args: + x: (..., d). + """ + shape = list(x.shape[:-1]) + [-1] + x = x.unsqueeze(-1) # (..., d, 1) + code = torch.cat([x, torch.sin(x * self.freq_bands), torch.cos(x * self.freq_bands)], dim=-1) # (..., d, 2f+1) + code = code.reshape(shape) + return code + + +class LayerNorm(nn.Module): + + def __init__(self, + normal_shape, + gamma=True, + beta=True, + epsilon=1e-10): + """Layer normalization layer + See: [Layer Normalization](https://arxiv.org/pdf/1607.06450.pdf) + :param normal_shape: The shape of the input tensor or the last dimension of the input tensor. + :param gamma: Add a scale parameter if it is True. + :param beta: Add an offset parameter if it is True. + :param epsilon: Epsilon for calculating variance. + """ + super().__init__() + if isinstance(normal_shape, int): + normal_shape = (normal_shape,) + else: + normal_shape = (normal_shape[-1],) + self.normal_shape = torch.Size(normal_shape) + self.epsilon = epsilon + if gamma: + self.gamma = nn.Parameter(torch.Tensor(*normal_shape)) + else: + self.register_parameter('gamma', None) + if beta: + self.beta = nn.Parameter(torch.Tensor(*normal_shape)) + else: + self.register_parameter('beta', None) + self.reset_parameters() + + def reset_parameters(self): + if self.gamma is not None: + self.gamma.data.fill_(1) + if self.beta is not None: + self.beta.data.zero_() + + def forward(self, x): + mean = x.mean(dim=-1, keepdim=True) + var = ((x - mean) ** 2).mean(dim=-1, keepdim=True) + std = (var + self.epsilon).sqrt() + y = (x - mean) / std + if self.gamma is not None: + y *= self.gamma + if self.beta is not None: + y += self.beta + return y + + def extra_repr(self): + return 'normal_shape={}, gamma={}, beta={}, epsilon={}'.format( + self.normal_shape, self.gamma is not None, self.beta is not None, self.epsilon, + ) diff --git a/pepflow/modules/common/so3.py b/pepflow/modules/common/so3.py new file mode 100644 index 0000000000000000000000000000000000000000..6eff844462b342b019a5ac0d9664d5a54c9a7642 --- /dev/null +++ b/pepflow/modules/common/so3.py @@ -0,0 +1,146 @@ +import math +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from pepflow.modules.common.geometry import quaternion_to_rotation_matrix + + +def log_rotation(R): + trace = R[..., range(3), range(3)].sum(-1) + if torch.is_grad_enabled(): + # The derivative of acos at -1.0 is -inf, so to stablize the gradient, we use -0.9999 + min_cos = -0.999 + else: + min_cos = -1.0 + cos_theta = ( (trace-1) / 2 ).clamp_min(min=min_cos) + sin_theta = torch.sqrt(1 - cos_theta**2) + theta = torch.acos(cos_theta) + coef = ((theta+1e-8)/(2*sin_theta+2e-8))[..., None, None] + logR = coef * (R - R.transpose(-1, -2)) + return logR + + +def skewsym_to_so3vec(S): + x = S[..., 1, 2] + y = S[..., 2, 0] + z = S[..., 0, 1] + w = torch.stack([x,y,z], dim=-1) + return w + + +def so3vec_to_skewsym(w): + x, y, z = torch.unbind(w, dim=-1) + o = torch.zeros_like(x) + S = torch.stack([ + o, z, -y, + -z, o, x, + y, -x, o, + ], dim=-1).reshape(w.shape[:-1] + (3, 3)) + return S + + +def exp_skewsym(S): + x = torch.linalg.norm(skewsym_to_so3vec(S), dim=-1) + I = torch.eye(3).to(S).view([1 for _ in range(S.dim()-2)] + [3, 3]) + + sinx, cosx = torch.sin(x), torch.cos(x) + b = (sinx + 1e-8) / (x + 1e-8) + c = (1-cosx + 1e-8) / (x**2 + 2e-8) # lim_{x->0} (1-cosx)/(x^2) = 0.5 + + S2 = S @ S + return I + b[..., None, None]*S + c[..., None, None]*S2 + + +def so3vec_to_rotation(w): + return exp_skewsym(so3vec_to_skewsym(w)) + + +def rotation_to_so3vec(R): + logR = log_rotation(R) + w = skewsym_to_so3vec(logR) + return w + + +def random_uniform_so3(size, device='cpu'): + q = F.normalize(torch.randn(list(size)+[4,], device=device), dim=-1) # (..., 4) + return rotation_to_so3vec(quaternion_to_rotation_matrix(q)) + + +class ApproxAngularDistribution(nn.Module): + + def __init__(self, stddevs, std_threshold=0.1, num_bins=8192, num_iters=1024): + super().__init__() + self.std_threshold = std_threshold + self.num_bins = num_bins + self.num_iters = num_iters + self.register_buffer('stddevs', torch.FloatTensor(stddevs)) + self.register_buffer('approx_flag', self.stddevs <= std_threshold) + self._precompute_histograms() + + @staticmethod + def _pdf(x, e, L): + """ + Args: + x: (N, ) + e: Float + L: Integer + """ + x = x[:, None] # (N, *) + c = ((1 - torch.cos(x)) / math.pi) # (N, *) + l = torch.arange(0, L)[None, :] # (*, L) + a = (2*l+1) * torch.exp(-l*(l+1)*(e**2)) # (*, L) + b = (torch.sin( (l+0.5)* x ) + 1e-6) / (torch.sin( x / 2 ) + 1e-6) # (N, L) + + f = (c * a * b).sum(dim=1) + return f + + def _precompute_histograms(self): + X, Y = [], [] + for std in self.stddevs: + std = std.item() + x = torch.linspace(0, math.pi, self.num_bins) # (n_bins,) + y = self._pdf(x, std, self.num_iters) # (n_bins,) + y = torch.nan_to_num(y).clamp_min(0) + X.append(x) + Y.append(y) + self.register_buffer('X', torch.stack(X, dim=0)) # (n_stddevs, n_bins) + self.register_buffer('Y', torch.stack(Y, dim=0)) # (n_stddevs, n_bins) + + def sample(self, std_idx): + """ + Args: + std_idx: Indices of standard deviation. + Returns: + samples: Angular samples [0, PI), same size as std. + """ + size = std_idx.size() + std_idx = std_idx.flatten() # (N,) + + # Samples from histogram + prob = self.Y[std_idx] # (N, n_bins) + bin_idx = torch.multinomial(prob[:, :-1], num_samples=1).squeeze(-1) # (N,) + bin_start = self.X[std_idx, bin_idx] # (N,) + bin_width = self.X[std_idx, bin_idx+1] - self.X[std_idx, bin_idx] + samples_hist = bin_start + torch.rand_like(bin_start) * bin_width # (N,) + + # Samples from Gaussian approximation + mean_gaussian = self.stddevs[std_idx]*2 + std_gaussian = self.stddevs[std_idx] + samples_gaussian = mean_gaussian + torch.randn_like(mean_gaussian) * std_gaussian + samples_gaussian = samples_gaussian.abs() % math.pi + + # Choose from histogram or Gaussian + gaussian_flag = self.approx_flag[std_idx] + samples = torch.where(gaussian_flag, samples_gaussian, samples_hist) + + return samples.reshape(size) + + +def random_normal_so3(std_idx, angular_distrib, device='cpu'): + size = std_idx.size() + u = F.normalize(torch.randn(list(size)+[3,], device=device), dim=-1) + theta = angular_distrib.sample(std_idx) + w = u * theta[..., None] + return w diff --git a/pepflow/modules/common/structure.py b/pepflow/modules/common/structure.py new file mode 100644 index 0000000000000000000000000000000000000000..8b638f33dbcf401c4938dd2da62842b17f5cad3f --- /dev/null +++ b/pepflow/modules/common/structure.py @@ -0,0 +1,77 @@ +import torch +from torch.nn import Module, Linear, LayerNorm, Sequential, ReLU + +from pepflow.modules.common.geometry import compose_rotation_and_translation, quaternion_to_rotation_matrix, repr_6d_to_rotation_matrix + + +class FrameRotationTranslationPrediction(Module): + + def __init__(self, feat_dim, rot_repr, nn_type='mlp'): + super().__init__() + assert rot_repr in ('quaternion', '6d') + self.rot_repr = rot_repr + if rot_repr == 'quaternion': + out_dim = 3 + 3 + elif rot_repr == '6d': + out_dim = 6 + 3 + + if nn_type == 'linear': + self.nn = Linear(feat_dim, out_dim) + elif nn_type == 'mlp': + self.nn = Sequential( + Linear(feat_dim, feat_dim), ReLU(), + Linear(feat_dim, feat_dim), ReLU(), + Linear(feat_dim, out_dim) + ) + else: + raise ValueError('Unknown nn_type: %s' % nn_type) + + def forward(self, x): + y = self.nn(x) # (..., d+3) + if self.rot_repr == 'quaternion': + quaternion = torch.cat([torch.ones_like(y[..., :1]), y[..., 0:3]], dim=-1) + R_delta = quaternion_to_rotation_matrix(quaternion) + t_delta = y[..., 3:6] + return R_delta, t_delta + elif self.rot_repr == '6d': + R_delta = repr_6d_to_rotation_matrix(y[..., 0:6]) + t_delta = y[..., 6:9] + return R_delta, t_delta + + +class FrameUpdate(Module): + + def __init__(self, node_feat_dim, rot_repr='quaternion', rot_tran_nn_type='mlp'): + super().__init__() + self.transition_mlp = Sequential( + Linear(node_feat_dim, node_feat_dim), ReLU(), + Linear(node_feat_dim, node_feat_dim), ReLU(), + Linear(node_feat_dim, node_feat_dim), + ) + self.transition_layer_norm = LayerNorm(node_feat_dim) + + self.rot_tran = FrameRotationTranslationPrediction(node_feat_dim, rot_repr, nn_type=rot_tran_nn_type) + + def forward(self, R, t, x, mask_generate): + """ + Args: + R: Frame basis matrices, (N, L, 3, 3_index). + t: Frame external (absolute) coordinates, (N, L, 3). Unit: Angstrom. + x: Node-wise features, (N, L, F). + mask_generate: Masks, (N, L). + Returns: + R': Updated basis matrices, (N, L, 3, 3_index). + t': Updated coordinates, (N, L, 3). + """ + x = self.transition_layer_norm(x + self.transition_mlp(x)) + + R_delta, t_delta = self.rot_tran(x) # (N, L, 3, 3), (N, L, 3) + R_new, t_new = compose_rotation_and_translation(R, t, R_delta, t_delta) + + mask_R = mask_generate[:, :, None, None].expand_as(R) + mask_t = mask_generate[:, :, None].expand_as(t) + + R_new = torch.where(mask_R, R_new, R) + t_new = torch.where(mask_t, t_new, t) + + return R_new, t_new diff --git a/pepflow/modules/common/topology.py b/pepflow/modules/common/topology.py new file mode 100644 index 0000000000000000000000000000000000000000..c1249882e86b8ab5d06d2f360b4502b344b9a0c7 --- /dev/null +++ b/pepflow/modules/common/topology.py @@ -0,0 +1,24 @@ +import torch +import torch.nn.functional as F + + +def get_consecutive_flag(chain_nb, res_nb, mask): + """ + Args: + chain_nb, res_nb + Returns: + consec: A flag tensor indicating whether residue-i is connected to residue-(i+1), + BoolTensor, (B, L-1)[b, i]. + """ + d_res_nb = (res_nb[:, 1:] - res_nb[:, :-1]).abs() # (B, L-1) + same_chain = (chain_nb[:, 1:] == chain_nb[:, :-1]) + consec = torch.logical_and(d_res_nb == 1, same_chain) + consec = torch.logical_and(consec, mask[:, :-1]) + return consec + + +def get_terminus_flag(chain_nb, res_nb, mask): + consec = get_consecutive_flag(chain_nb, res_nb, mask) + N_term_flag = F.pad(torch.logical_not(consec), pad=(1, 0), value=1) + C_term_flag = F.pad(torch.logical_not(consec), pad=(0, 1), value=1) + return N_term_flag, C_term_flag diff --git a/pepflow/modules/protein/constants.py b/pepflow/modules/protein/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..5a3e52501d843c5ccaa99229762f8c4386782c01 --- /dev/null +++ b/pepflow/modules/protein/constants.py @@ -0,0 +1,888 @@ +import torch +import enum + + +## others +NM_TO_ANG_SCALE = 10.0 +ANG_TO_NM_SCALE = 1 / NM_TO_ANG_SCALE + +PAD_RESIDUE_INDEX = 21 + +## +# Residue identities + +non_standard_residue_substitutions = { + '2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG', + 'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA', + 'BUC':'CYS', 'BUG':'LEU', 'C5C':'CYS', 'C6C':'CYS', 'CAS':'CYS', 'CCS':'CYS', 'CEA':'CYS', 'CGU':'GLU', 'CHG':'ALA', 'CLE':'LEU', 'CME':'CYS', + 'CSD':'ALA', 'CSO':'CYS', 'CSP':'CYS', 'CSS':'CYS', 'CSW':'CYS', 'CSX':'CYS', 'CXM':'MET', 'CY1':'CYS', 'CY3':'CYS', 'CYG':'CYS', + 'CYM':'CYS', 'CYQ':'CYS', 'DAH':'PHE', 'DAL':'ALA', 'DAR':'ARG', 'DAS':'ASP', 'DCY':'CYS', 'DGL':'GLU', 'DGN':'GLN', 'DHA':'ALA', + 'DHI':'HIS', 'DIL':'ILE', 'DIV':'VAL', 'DLE':'LEU', 'DLY':'LYS', 'DNP':'ALA', 'DPN':'PHE', 'DPR':'PRO', 'DSN':'SER', 'DSP':'ASP', + 'DTH':'THR', 'DTR':'TRP', 'DTY':'TYR', 'DVA':'VAL', 'EFC':'CYS', 'FLA':'ALA', 'FME':'MET', 'GGL':'GLU', 'GL3':'GLY', 'GLZ':'GLY', + 'GMA':'GLU', 'GSC':'GLY', 'HAC':'ALA', 'HAR':'ARG', 'HIC':'HIS', 'HIP':'HIS', 'HMR':'ARG', 'HPQ':'PHE', 'HTR':'TRP', 'HYP':'PRO', + 'IAS':'ASP', 'IIL':'ILE', 'IYR':'TYR', 'KCX':'LYS', 'LLP':'LYS', 'LLY':'LYS', 'LTR':'TRP', 'LYM':'LYS', 'LYZ':'LYS', 'MAA':'ALA', 'MEN':'ASN', + 'MHS':'HIS', 'MIS':'SER', 'MLE':'LEU', 'MPQ':'GLY', 'MSA':'GLY', 'MSE':'MET', 'MVA':'VAL', 'NEM':'HIS', 'NEP':'HIS', 'NLE':'LEU', + 'NLN':'LEU', 'NLP':'LEU', 'NMC':'GLY', 'OAS':'SER', 'OCS':'CYS', 'OMT':'MET', 'PAQ':'TYR', 'PCA':'GLU', 'PEC':'CYS', 'PHI':'PHE', + 'PHL':'PHE', 'PR3':'CYS', 'PRR':'ALA', 'PTR':'TYR', 'PYX':'CYS', 'SAC':'SER', 'SAR':'GLY', 'SCH':'CYS', 'SCS':'CYS', 'SCY':'CYS', + 'SEL':'SER', 'SEP':'SER', 'SET':'SER', 'SHC':'CYS', 'SHR':'LYS', 'SMC':'CYS', 'SOC':'CYS', 'STY':'TYR', 'SVA':'SER', 'TIH':'ALA', + 'TPL':'TRP', 'TPO':'THR', 'TPQ':'ALA', 'TRG':'LYS', 'TRO':'TRP', 'TYB':'TYR', 'TYI':'TYR', 'TYQ':'TYR', 'TYS':'TYR', 'TYY':'TYR', + 'ALA':'ALA', 'CYS':'CYS', 'ASP':'ASP', 'GLU':'GLU', 'PHE':'PHE', 'GLY':'GLY', 'HIS':'HIS', 'ILE':'ILE', 'LYS':'LYS', 'LEU':'LEU', + 'MET':'MET', 'ASN':'ASN', 'PRO':'PRO', 'GLN':'GLN', 'ARG':'ARG', 'SER':'SER', 'THR':'THR', 'VAL':'VAL', 'TRP':'TRP', 'TYR':'TYR', + 'UNK':'UNK' +} + + +ressymb_to_resindex = { + 'A': 0, 'C': 1, 'D': 2, 'E': 3, 'F': 4, + 'G': 5, 'H': 6, 'I': 7, 'K': 8, 'L': 9, + 'M': 10, 'N': 11, 'P': 12, 'Q': 13, 'R': 14, + 'S': 15, 'T': 16, 'V': 17, 'W': 18, 'Y': 19, + 'X': 20, +} + +resindex_to_ressymb = {} +for k,v in ressymb_to_resindex.items(): resindex_to_ressymb[v] = k + +BACKBONE_FRAME = 0 +OMEGA_FRAME = 1 +PHI_FRAME = 2 +PSI_FRAME = 3 +CHI1_FRAME, CHI2_FRAME, CHI3_FRAME, CHI4_FRAME = 4, 5, 6, 7 + + +class AA(enum.IntEnum): + ALA = 0; CYS = 1; ASP = 2; GLU = 3; PHE = 4 + GLY = 5; HIS = 6; ILE = 7; LYS = 8; LEU = 9 + MET = 10; ASN = 11; PRO = 12; GLN = 13; ARG = 14 + SER = 15; THR = 16; VAL = 17; TRP = 18; TYR = 19 + UNK = 20 + + @classmethod + def _missing_(cls, value): + if isinstance(value, str) and len(value) == 3: # three representation + if value in non_standard_residue_substitutions: + value = non_standard_residue_substitutions[value] + if value in cls._member_names_: + return getattr(cls, value) + elif isinstance(value, str) and len(value) == 1: # one representation + if value in ressymb_to_resindex: + return cls(ressymb_to_resindex[value]) + + return super()._missing_(value) + + def __str__(self): + return self.name + + @classmethod + def is_aa(cls, value): + return (value in ressymb_to_resindex) or \ + (value in non_standard_residue_substitutions) or \ + (value in cls._member_names_) + + +num_aa_types = len(AA) + +## +# Atom identities + +class BBHeavyAtom(enum.IntEnum): + N = 0; CA = 1; C = 2; O = 3; CB = 4; OXT=14; + +max_num_heavyatoms = 15 +max_num_hydrogens = 16 +max_num_allatoms = max_num_heavyatoms + max_num_hydrogens + +restype_to_heavyatom_names = { + AA.ALA: ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', '', 'OXT'], + AA.ARG: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', '', 'OXT'], + AA.ASN: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', '', 'OXT'], + AA.ASP: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', '', 'OXT'], + AA.CYS: ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', '', 'OXT'], + AA.GLN: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', '', 'OXT'], + AA.GLU: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', '', 'OXT'], + AA.GLY: ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', '', 'OXT'], + AA.HIS: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', '', 'OXT'], + AA.ILE: ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', '', 'OXT'], + AA.LEU: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', '', 'OXT'], + AA.LYS: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', '', 'OXT'], + AA.MET: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', '', 'OXT'], + AA.PHE: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', '', 'OXT'], + AA.PRO: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', '', 'OXT'], + AA.SER: ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', '', 'OXT'], + AA.THR: ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', '', 'OXT'], + AA.TRP: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2', 'OXT'], + AA.TYR: ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', '', 'OXT'], + AA.VAL: ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', '', 'OXT'], + AA.UNK: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], +} +for names in restype_to_heavyatom_names.values(): assert len(names) == max_num_heavyatoms + +restype_to_hydrogen_names = { + AA.ALA: ['H', 'H2', 'H3', 'HA', 'HB1', 'HB2', 'HB3', 'HXT', '', '', '', '', '', '', '', ''], + AA.CYS: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG', 'HXT', '', '', '', '', '', '', '', ''], + AA.ASP: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD2', 'HXT', '', '', '', '', '', '', '', ''], + AA.GLU: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HE2', 'HXT', '', '', '', '', '', ''], + AA.PHE: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD1', 'HD2', 'HE1', 'HE2', 'HZ', 'HXT', '', '', '', ''], + AA.GLY: ['H', 'H2', 'H3', 'HA2', 'HA3', 'HXT', '', '', '', '', '', '', '', '', '', ''], + AA.HIS: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD1', 'HD2', 'HE1', 'HE2', 'HXT', '', '', '', '', ''], + AA.ILE: ['H', 'H2', 'H3', 'HA', 'HB', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23', 'HD11', 'HD12', 'HD13', 'HXT', '', ''], + AA.LYS: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HD2', 'HD3', 'HE2', 'HE3', 'HZ1', 'HZ2', 'HZ3', 'HXT'], + AA.LEU: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG', 'HD11', 'HD12', 'HD13', 'HD21', 'HD22', 'HD23', 'HXT', '', ''], + AA.MET: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HE1', 'HE2', 'HE3', 'HXT', '', '', '', ''], + AA.ASN: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD21', 'HD22', 'HXT', '', '', '', '', '', '', ''], + AA.PRO: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HD2', 'HD3', 'HXT', '', '', '', '', ''], + AA.GLN: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HE21', 'HE22', 'HXT', '', '', '', '', ''], + AA.ARG: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HD2', 'HD3', 'HE', 'HH11', 'HH12', 'HH21', 'HH22', 'HXT'], + AA.SER: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HG', 'HXT', '', '', '', '', '', '', '', ''], + AA.THR: ['H', 'H2', 'H3', 'HA', 'HB', 'HG1', 'HG21', 'HG22', 'HG23', 'HXT', '', '', '', '', '', ''], + AA.VAL: ['H', 'H2', 'H3', 'HA', 'HB', 'HG11', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23', 'HXT', '', '', '', ''], + AA.TRP: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD1', 'HE1', 'HE3', 'HZ2', 'HZ3', 'HH2', 'HXT', '', '', ''], + AA.TYR: ['H', 'H2', 'H3', 'HA', 'HB2', 'HB3', 'HD1', 'HD2', 'HE1', 'HE2', 'HH', 'HXT', '', '', '', ''], + AA.UNK: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], +} +for names in restype_to_hydrogen_names.values(): assert len(names) == max_num_hydrogens + +restype_to_allatom_names = { + restype: restype_to_heavyatom_names[restype] + restype_to_hydrogen_names[restype] + for restype in AA +} + +restype_atom14_name_to_index = { + resname: {name: index for index, name in enumerate(atoms) if name != ""} + for resname, atoms in restype_to_heavyatom_names.items() +} + +## +# Bond identities + +class BondType(enum.IntEnum): + NoBond = 0 + Single = 1 + Double = 2 + Triple = 3 + AromaticSingle = 5 + AromaticDouble = 6 + +BT = BondType +restype_to_bonded_atom_name_pairs = { + AA.ALA: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'HB1', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.CYS: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'SG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('SG', 'HG', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.ASP: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'OD1', BT.AromaticDouble), ('CG', 'OD2', BT.AromaticSingle), ('OD2', 'HD2', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.GLU: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), ('CG', 'HG3', BT.AromaticSingle), + ('CD', 'OE1', BT.AromaticDouble), ('CD', 'OE2', BT.AromaticSingle), ('OE2', 'HE2', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.PHE: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD1', BT.AromaticDouble), ('CG', 'CD2', BT.AromaticSingle), ('CD1', 'CE1', BT.AromaticSingle), + ('CD1', 'HD1', BT.AromaticSingle), ('CD2', 'CE2', BT.AromaticDouble), ('CD2', 'HD2', BT.AromaticSingle), + ('CE1', 'CZ', BT.AromaticDouble), ('CE1', 'HE1', BT.AromaticSingle), ('CE2', 'CZ', BT.AromaticSingle), + ('CE2', 'HE2', BT.AromaticSingle), ('CZ', 'HZ', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.GLY: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'HA2', BT.AromaticSingle), + ('CA', 'HA3', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.HIS: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'ND1', BT.AromaticSingle), ('CG', 'CD2', BT.AromaticDouble), ('ND1', 'CE1', BT.AromaticDouble), + ('ND1', 'HD1', BT.AromaticSingle), ('CD2', 'NE2', BT.AromaticSingle), ('CD2', 'HD2', BT.AromaticSingle), + ('CE1', 'NE2', BT.AromaticSingle), ('CE1', 'HE1', BT.AromaticSingle), ('NE2', 'HE2', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.ILE: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG1', BT.AromaticSingle), ('CB', 'CG2', BT.AromaticSingle), ('CB', 'HB', BT.AromaticSingle), + ('CG1', 'CD1', BT.AromaticSingle), ('CG1', 'HG12', BT.AromaticSingle), ('CG1', 'HG13', BT.AromaticSingle), + ('CG2', 'HG21', BT.AromaticSingle), ('CG2', 'HG22', BT.AromaticSingle), ('CG2', 'HG23', BT.AromaticSingle), + ('CD1', 'HD11', BT.AromaticSingle), ('CD1', 'HD12', BT.AromaticSingle), ('CD1', 'HD13', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.LYS: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), ('CG', 'HG3', BT.AromaticSingle), + ('CD', 'CE', BT.AromaticSingle), ('CD', 'HD2', BT.AromaticSingle), ('CD', 'HD3', BT.AromaticSingle), + ('CE', 'NZ', BT.AromaticSingle), ('CE', 'HE2', BT.AromaticSingle), ('CE', 'HE3', BT.AromaticSingle), + ('NZ', 'HZ1', BT.AromaticSingle), ('NZ', 'HZ2', BT.AromaticSingle), ('NZ', 'HZ3', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.LEU: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD1', BT.AromaticSingle), ('CG', 'CD2', BT.AromaticSingle), ('CG', 'HG', BT.AromaticSingle), + ('CD1', 'HD11', BT.AromaticSingle), ('CD1', 'HD12', BT.AromaticSingle), ('CD1', 'HD13', BT.AromaticSingle), + ('CD2', 'HD21', BT.AromaticSingle), ('CD2', 'HD22', BT.AromaticSingle), ('CD2', 'HD23', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.MET: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'SD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), ('CG', 'HG3', BT.AromaticSingle), + ('SD', 'CE', BT.AromaticSingle), ('CE', 'HE1', BT.AromaticSingle), ('CE', 'HE2', BT.AromaticSingle), + ('CE', 'HE3', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.ASN: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'OD1', BT.AromaticDouble), ('CG', 'ND2', BT.AromaticSingle), ('ND2', 'HD21', BT.AromaticSingle), + ('ND2', 'HD22', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.PRO: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('N', 'CD', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), + ('CA', 'CB', BT.AromaticSingle), ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), + ('C', 'OXT', BT.AromaticSingle), ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), + ('CB', 'HB3', BT.AromaticSingle), ('CG', 'CD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), + ('CG', 'HG3', BT.AromaticSingle), ('CD', 'HD2', BT.AromaticSingle), ('CD', 'HD3', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.GLN: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), ('CG', 'HG3', BT.AromaticSingle), + ('CD', 'OE1', BT.AromaticDouble), ('CD', 'NE2', BT.AromaticSingle), ('NE2', 'HE21', BT.AromaticSingle), + ('NE2', 'HE22', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.ARG: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD', BT.AromaticSingle), ('CG', 'HG2', BT.AromaticSingle), ('CG', 'HG3', BT.AromaticSingle), + ('CD', 'NE', BT.AromaticSingle), ('CD', 'HD2', BT.AromaticSingle), ('CD', 'HD3', BT.AromaticSingle), + ('NE', 'CZ', BT.AromaticSingle), ('NE', 'HE', BT.AromaticSingle), ('CZ', 'NH1', BT.AromaticSingle), + ('CZ', 'NH2', BT.AromaticDouble), ('NH1', 'HH11', BT.AromaticSingle), ('NH1', 'HH12', BT.AromaticSingle), + ('NH2', 'HH21', BT.AromaticSingle), ('NH2', 'HH22', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.SER: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'OG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('OG', 'HG', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.THR: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'OG1', BT.AromaticSingle), ('CB', 'CG2', BT.AromaticSingle), ('CB', 'HB', BT.AromaticSingle), + ('OG1', 'HG1', BT.AromaticSingle), ('CG2', 'HG21', BT.AromaticSingle), ('CG2', 'HG22', BT.AromaticSingle), + ('CG2', 'HG23', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.VAL: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG1', BT.AromaticSingle), ('CB', 'CG2', BT.AromaticSingle), ('CB', 'HB', BT.AromaticSingle), + ('CG1', 'HG11', BT.AromaticSingle), ('CG1', 'HG12', BT.AromaticSingle), ('CG1', 'HG13', BT.AromaticSingle), + ('CG2', 'HG21', BT.AromaticSingle), ('CG2', 'HG22', BT.AromaticSingle), ('CG2', 'HG23', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.TRP: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD1', BT.AromaticDouble), ('CG', 'CD2', BT.AromaticSingle), ('CD1', 'NE1', BT.AromaticSingle), + ('CD1', 'HD1', BT.AromaticSingle), ('CD2', 'CE2', BT.AromaticDouble), ('CD2', 'CE3', BT.AromaticSingle), + ('NE1', 'CE2', BT.AromaticSingle), ('NE1', 'HE1', BT.AromaticSingle), ('CE2', 'CZ2', BT.AromaticSingle), + ('CE3', 'CZ3', BT.AromaticDouble), ('CE3', 'HE3', BT.AromaticSingle), ('CZ2', 'CH2', BT.AromaticDouble), + ('CZ2', 'HZ2', BT.AromaticSingle), ('CZ3', 'CH2', BT.AromaticSingle), ('CZ3', 'HZ3', BT.AromaticSingle), + ('CH2', 'HH2', BT.AromaticSingle), ('OXT', 'HXT', BT.AromaticSingle), ], + AA.TYR: [ + ('N', 'H', BT.Single), ('N', 'H2', BT.Single), ('N', 'H3', BT.Single), + ('N', 'CA', BT.AromaticSingle), ('CA', 'C', BT.AromaticSingle), ('CA', 'CB', BT.AromaticSingle), + ('CA', 'HA', BT.AromaticSingle), ('C', 'O', BT.AromaticDouble), ('C', 'OXT', BT.AromaticSingle), + ('CB', 'CG', BT.AromaticSingle), ('CB', 'HB2', BT.AromaticSingle), ('CB', 'HB3', BT.AromaticSingle), + ('CG', 'CD1', BT.AromaticDouble), ('CG', 'CD2', BT.AromaticSingle), ('CD1', 'CE1', BT.AromaticSingle), + ('CD1', 'HD1', BT.AromaticSingle), ('CD2', 'CE2', BT.AromaticDouble), ('CD2', 'HD2', BT.AromaticSingle), + ('CE1', 'CZ', BT.AromaticDouble), ('CE1', 'HE1', BT.AromaticSingle), ('CE2', 'CZ', BT.AromaticSingle), + ('CE2', 'HE2', BT.AromaticSingle), ('CZ', 'OH', BT.AromaticSingle), ('OH', 'HH', BT.AromaticSingle), + ('OXT', 'HXT', BT.AromaticSingle), ], + AA.UNK: [], +} + + +restype_to_allatom_bond_matrix = { + restype: torch.zeros([max_num_allatoms, max_num_allatoms], dtype=torch.long) + for restype in AA +} +restype_to_heavyatom_bond_matrix = { + restype: torch.zeros([max_num_heavyatoms, max_num_heavyatoms], dtype=torch.long) + for restype in AA +} + +def _make_bond_matrices(): + for restype in AA: + for atom1_name, atom2_name, bond_type in restype_to_bonded_atom_name_pairs[restype]: + idx1 = restype_to_allatom_names[restype].index(atom1_name) + idx2 = restype_to_allatom_names[restype].index(atom2_name) + restype_to_allatom_bond_matrix[restype][idx1, idx2] = bond_type + restype_to_allatom_bond_matrix[restype][idx2, idx1] = bond_type + if atom1_name in restype_to_heavyatom_names[restype] and \ + atom2_name in restype_to_heavyatom_names[restype]: + jdx1 = restype_to_heavyatom_names[restype].index(atom1_name) + jdx2 = restype_to_heavyatom_names[restype].index(atom2_name) + restype_to_heavyatom_bond_matrix[restype][jdx1, jdx2] = bond_type + restype_to_heavyatom_bond_matrix[restype][jdx2, jdx1] = bond_type +_make_bond_matrices() + + +## +# Torsion geometry and ideal coordinates + +class Torsion(enum.IntEnum): + Backbone = 0 + Omega = 1 + Phi = 2 + Psi = 3 + Chi1 = 4 + Chi2 = 5 + Chi3 = 6 + Chi7 = 7 + + +chi_angles_atoms = { + AA.ALA: [], + # Chi5 in arginine is always 0 +- 5 degrees, so ignore it. + AA.ARG: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']], + AA.ASN: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], + AA.ASP: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], + AA.CYS: [['N', 'CA', 'CB', 'SG']], + AA.GLN: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'OE1']], + AA.GLU: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'OE1']], + AA.GLY: [], + AA.HIS: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']], + AA.ILE: [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']], + AA.LEU: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + AA.LYS: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']], + AA.MET: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'], + ['CB', 'CG', 'SD', 'CE']], + AA.PHE: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + AA.PRO: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']], + AA.SER: [['N', 'CA', 'CB', 'OG']], + AA.THR: [['N', 'CA', 'CB', 'OG1']], + AA.TRP: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + AA.TYR: [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + AA.VAL: [['N', 'CA', 'CB', 'CG1']], +} + + +chi_angles_mask = { + AA.ALA: [False, False, False, False], # ALA + AA.ARG: [True , True , True , True ], # ARG + AA.ASN: [True , True , False, False], # ASN + AA.ASP: [True , True , False, False], # ASP + AA.CYS: [True , False, False, False], # CYS + AA.GLN: [True , True , True , False], # GLN + AA.GLU: [True , True , True , False], # GLU + AA.GLY: [False, False, False, False], # GLY + AA.HIS: [True , True , False, False], # HIS + AA.ILE: [True , True , False, False], # ILE + AA.LEU: [True , True , False, False], # LEU + AA.LYS: [True , True , True , True ], # LYS + AA.MET: [True , True , True , False], # MET + AA.PHE: [True , True , False, False], # PHE + AA.PRO: [True , True , False, False], # PRO + AA.SER: [True , False, False, False], # SER + AA.THR: [True , False, False, False], # THR + AA.TRP: [True , True , False, False], # TRP + AA.TYR: [True , True , False, False], # TYR + AA.VAL: [True , False, False, False], # VAL + AA.UNK: [False, False, False, False], # UNK +} + + +chi_pi_periodic = { + AA.ALA: [False, False, False, False], # ALA + AA.ARG: [False, False, False, False], # ARG + AA.ASN: [False, False, False, False], # ASN + AA.ASP: [False, True , False, False], # ASP + AA.CYS: [False, False, False, False], # CYS + AA.GLN: [False, False, False, False], # GLN + AA.GLU: [False, False, True , False], # GLU + AA.GLY: [False, False, False, False], # GLY + AA.HIS: [False, False, False, False], # HIS + AA.ILE: [False, False, False, False], # ILE + AA.LEU: [False, False, False, False], # LEU + AA.LYS: [False, False, False, False], # LYS + AA.MET: [False, False, False, False], # MET + AA.PHE: [False, True , False, False], # PHE + AA.PRO: [False, False, False, False], # PRO + AA.SER: [False, False, False, False], # SER + AA.THR: [False, False, False, False], # THR + AA.TRP: [False, False, False, False], # TRP + AA.TYR: [False, True , False, False], # TYR + AA.VAL: [False, False, False, False], # VAL + AA.UNK: [False, False, False, False], # UNK +} + + +rigid_group_heavy_atom_positions = { + AA.ALA: [ + ['N', 0, (-0.525, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.529, -0.774, -1.205)], + ['O', 3, (0.627, 1.062, 0.000)], + ], + AA.ARG: [ + ['N', 0, (-0.524, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.524, -0.778, -1.209)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG', 4, (0.616, 1.390, -0.000)], + ['CD', 5, (0.564, 1.414, 0.000)], + ['NE', 6, (0.539, 1.357, -0.000)], + ['NH1', 7, (0.206, 2.301, 0.000)], + ['NH2', 7, (2.078, 0.978, -0.000)], + ['CZ', 7, (0.758, 1.093, -0.000)], + ], + AA.ASN: [ + ['N', 0, (-0.536, 1.357, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.531, -0.787, -1.200)], + ['O', 3, (0.625, 1.062, 0.000)], + ['CG', 4, (0.584, 1.399, 0.000)], + ['ND2', 5, (0.593, -1.188, 0.001)], + ['OD1', 5, (0.633, 1.059, 0.000)], + ], + AA.ASP: [ + ['N', 0, (-0.525, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, 0.000, -0.000)], + ['CB', 0, (-0.526, -0.778, -1.208)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.593, 1.398, -0.000)], + ['OD1', 5, (0.610, 1.091, 0.000)], + ['OD2', 5, (0.592, -1.101, -0.003)], + ], + AA.CYS: [ + ['N', 0, (-0.522, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, 0.000, 0.000)], + ['CB', 0, (-0.519, -0.773, -1.212)], + ['O', 3, (0.625, 1.062, -0.000)], + ['SG', 4, (0.728, 1.653, 0.000)], + ], + AA.GLN: [ + ['N', 0, (-0.526, 1.361, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, 0.000)], + ['CB', 0, (-0.525, -0.779, -1.207)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.615, 1.393, 0.000)], + ['CD', 5, (0.587, 1.399, -0.000)], + ['NE2', 6, (0.593, -1.189, -0.001)], + ['OE1', 6, (0.634, 1.060, 0.000)], + ], + AA.GLU: [ + ['N', 0, (-0.528, 1.361, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.526, -0.781, -1.207)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG', 4, (0.615, 1.392, 0.000)], + ['CD', 5, (0.600, 1.397, 0.000)], + ['OE1', 6, (0.607, 1.095, -0.000)], + ['OE2', 6, (0.589, -1.104, -0.001)], + ], + AA.GLY: [ + ['N', 0, (-0.572, 1.337, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.517, -0.000, -0.000)], + ['O', 3, (0.626, 1.062, -0.000)], + ], + AA.HIS: [ + ['N', 0, (-0.527, 1.360, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, 0.000, 0.000)], + ['CB', 0, (-0.525, -0.778, -1.208)], + ['O', 3, (0.625, 1.063, 0.000)], + ['CG', 4, (0.600, 1.370, -0.000)], + ['CD2', 5, (0.889, -1.021, 0.003)], + ['ND1', 5, (0.744, 1.160, -0.000)], + ['CE1', 5, (2.030, 0.851, 0.002)], + ['NE2', 5, (2.145, -0.466, 0.004)], + ], + AA.ILE: [ + ['N', 0, (-0.493, 1.373, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, -0.000)], + ['CB', 0, (-0.536, -0.793, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG1', 4, (0.534, 1.437, -0.000)], + ['CG2', 4, (0.540, -0.785, -1.199)], + ['CD1', 5, (0.619, 1.391, 0.000)], + ], + AA.LEU: [ + ['N', 0, (-0.520, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.522, -0.773, -1.214)], + ['O', 3, (0.625, 1.063, -0.000)], + ['CG', 4, (0.678, 1.371, 0.000)], + ['CD1', 5, (0.530, 1.430, -0.000)], + ['CD2', 5, (0.535, -0.774, 1.200)], + ], + AA.LYS: [ + ['N', 0, (-0.526, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, 0.000)], + ['CB', 0, (-0.524, -0.778, -1.208)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.619, 1.390, 0.000)], + ['CD', 5, (0.559, 1.417, 0.000)], + ['CE', 6, (0.560, 1.416, 0.000)], + ['NZ', 7, (0.554, 1.387, 0.000)], + ], + AA.MET: [ + ['N', 0, (-0.521, 1.364, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, 0.000, 0.000)], + ['CB', 0, (-0.523, -0.776, -1.210)], + ['O', 3, (0.625, 1.062, -0.000)], + ['CG', 4, (0.613, 1.391, -0.000)], + ['SD', 5, (0.703, 1.695, 0.000)], + ['CE', 6, (0.320, 1.786, -0.000)], + ], + AA.PHE: [ + ['N', 0, (-0.518, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, 0.000, -0.000)], + ['CB', 0, (-0.525, -0.776, -1.212)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.607, 1.377, 0.000)], + ['CD1', 5, (0.709, 1.195, -0.000)], + ['CD2', 5, (0.706, -1.196, 0.000)], + ['CE1', 5, (2.102, 1.198, -0.000)], + ['CE2', 5, (2.098, -1.201, -0.000)], + ['CZ', 5, (2.794, -0.003, -0.001)], + ], + AA.PRO: [ + ['N', 0, (-0.566, 1.351, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, 0.000)], + ['CB', 0, (-0.546, -0.611, -1.293)], + ['O', 3, (0.621, 1.066, 0.000)], + ['CG', 4, (0.382, 1.445, 0.0)], + # ['CD', 5, (0.427, 1.440, 0.0)], + ['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger + ], + AA.SER: [ + ['N', 0, (-0.529, 1.360, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.518, -0.777, -1.211)], + ['O', 3, (0.626, 1.062, -0.000)], + ['OG', 4, (0.503, 1.325, 0.000)], + ], + AA.THR: [ + ['N', 0, (-0.517, 1.364, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, -0.000)], + ['CB', 0, (-0.516, -0.793, -1.215)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG2', 4, (0.550, -0.718, -1.228)], + ['OG1', 4, (0.472, 1.353, 0.000)], + ], + AA.TRP: [ + ['N', 0, (-0.521, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, 0.000)], + ['CB', 0, (-0.523, -0.776, -1.212)], + ['O', 3, (0.627, 1.062, 0.000)], + ['CG', 4, (0.609, 1.370, -0.000)], + ['CD1', 5, (0.824, 1.091, 0.000)], + ['CD2', 5, (0.854, -1.148, -0.005)], + ['CE2', 5, (2.186, -0.678, -0.007)], + ['CE3', 5, (0.622, -2.530, -0.007)], + ['NE1', 5, (2.140, 0.690, -0.004)], + ['CH2', 5, (3.028, -2.890, -0.013)], + ['CZ2', 5, (3.283, -1.543, -0.011)], + ['CZ3', 5, (1.715, -3.389, -0.011)], + ], + AA.TYR: [ + ['N', 0, (-0.522, 1.362, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, -0.000, -0.000)], + ['CB', 0, (-0.522, -0.776, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG', 4, (0.607, 1.382, -0.000)], + ['CD1', 5, (0.716, 1.195, -0.000)], + ['CD2', 5, (0.713, -1.194, -0.001)], + ['CE1', 5, (2.107, 1.200, -0.002)], + ['CE2', 5, (2.104, -1.201, -0.003)], + ['OH', 5, (4.168, -0.002, -0.005)], + ['CZ', 5, (2.791, -0.001, -0.003)], + ], + AA.VAL: [ + ['N', 0, (-0.494, 1.373, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, -0.000)], + ['CB', 0, (-0.533, -0.795, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG1', 4, (0.540, 1.429, -0.000)], + ['CG2', 4, (0.533, -0.776, 1.203)], + ], +} + + +# The following tensors are initialized by `_make_rigid_group_constants` +restype_rigid_group_rotation = torch.zeros([21, 8, 3, 3]) +restype_rigid_group_translation = torch.zeros([21, 8, 3]) +restype_heavyatom_to_rigid_group = torch.zeros([21, 14], dtype=torch.long) +restype_heavyatom_rigid_group_positions = torch.zeros([21, 14, 3]) + +def _make_rigid_group_constants(): + + def _make_rotation_matrix(ex, ey): + ex_normalized = ex / torch.linalg.norm(ex) + + # make ey perpendicular to ex + ey_normalized = ey - torch.dot(ey, ex_normalized) * ex_normalized + ey_normalized /= torch.linalg.norm(ey_normalized) + + eznorm = torch.cross(ex_normalized, ey_normalized) + m = torch.stack([ex_normalized, ey_normalized, eznorm]).transpose(0, 1) # (3, 3_index) + return m + + for restype in AA: + if restype == AA.UNK: continue + + atom_groups = { + name: group + for name, group, _ in rigid_group_heavy_atom_positions[restype] + } + atom_positions = { + name: torch.FloatTensor(pos) + for name, _, pos in rigid_group_heavy_atom_positions[restype] + } + + # Atom 14 rigid group positions + for atom_idx, atom_name in enumerate(restype_to_heavyatom_names[restype]): + if (atom_name == '') or (atom_name not in atom_groups): continue + restype_heavyatom_to_rigid_group[restype, atom_idx] = atom_groups[atom_name] + restype_heavyatom_rigid_group_positions[restype, atom_idx, :] = atom_positions[atom_name] + + # 0: backbone to backbone + restype_rigid_group_rotation[restype, Torsion.Backbone, :, :] = torch.eye(3) + restype_rigid_group_translation[restype, Torsion.Backbone, :] = torch.zeros([3]) + + # 1: omega-frame to backbone + restype_rigid_group_rotation[restype, Torsion.Omega, :, :] = torch.eye(3) + restype_rigid_group_translation[restype, Torsion.Omega, :] = torch.zeros([3]) + + # 2: phi-frame to backbone + restype_rigid_group_rotation[restype, Torsion.Phi, :, :] = _make_rotation_matrix( + ex = atom_positions['N'] - atom_positions['CA'], + ey = torch.FloatTensor([1., 0., 0.]), + ) + restype_rigid_group_translation[restype, Torsion.Phi, :] = atom_positions['N'] + + # 3: psi-frame to backbone + restype_rigid_group_rotation[restype, Torsion.Psi, :, :] = _make_rotation_matrix( + ex = atom_positions['C'] - atom_positions['CA'], + ey = atom_positions['CA'] - atom_positions['N'], # In accordance to the definition of psi angle + ) + restype_rigid_group_translation[restype, Torsion.Psi, :] = atom_positions['C'] + + # 4: chi1-frame to backbone + if chi_angles_mask[restype][0]: + base_atom_names = chi_angles_atoms[restype][0] + base_atom_positions = [atom_positions[name] for name in base_atom_names] + restype_rigid_group_rotation[restype, Torsion.Chi1, :, :] = _make_rotation_matrix( + ex=base_atom_positions[2] - base_atom_positions[1], + ey=base_atom_positions[0] - base_atom_positions[1], + ) + restype_rigid_group_translation[restype, Torsion.Chi1, :] = base_atom_positions[2] + + # chi2-chi1 + # chi3-chi2 + # chi4-chi3 + for chi_idx in range(1, 4): + if chi_angles_mask[restype][chi_idx]: + axis_end_atom_name = chi_angles_atoms[restype][chi_idx][2] + axis_end_atom_position = atom_positions[axis_end_atom_name] + restype_rigid_group_rotation[restype, Torsion.Chi1+chi_idx, :, :] = _make_rotation_matrix( + ex = axis_end_atom_position, + ey = torch.FloatTensor([-1., 0., 0.]), + ) + restype_rigid_group_translation[restype, Torsion.Chi1+chi_idx, :] = axis_end_atom_position + +_make_rigid_group_constants() + + +""" +# The following tensors are taken from diffab +""" +backbone_atom_coordinates = { + AA.ALA: [ + (-0.525, 1.363, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, -0.0, -0.0), # C + ], + AA.ARG: [ + (-0.524, 1.362, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, -0.0, -0.0), # C + ], + AA.ASN: [ + (-0.536, 1.357, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, -0.0, -0.0), # C + ], + AA.ASP: [ + (-0.525, 1.362, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.527, 0.0, -0.0), # C + ], + AA.CYS: [ + (-0.522, 1.362, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.524, 0.0, 0.0), # C + ], + AA.GLN: [ + (-0.526, 1.361, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, 0.0, 0.0), # C + ], + AA.GLU: [ + (-0.528, 1.361, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, -0.0, -0.0), # C + ], + AA.GLY: [ + (-0.572, 1.337, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.517, -0.0, -0.0), # C + ], + AA.HIS: [ + (-0.527, 1.36, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, 0.0, 0.0), # C + ], + AA.ILE: [ + (-0.493, 1.373, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.527, -0.0, -0.0), # C + ], + AA.LEU: [ + (-0.52, 1.363, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, -0.0, -0.0), # C + ], + AA.LYS: [ + (-0.526, 1.362, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, 0.0, 0.0), # C + ], + AA.MET: [ + (-0.521, 1.364, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, 0.0, 0.0), # C + ], + AA.PHE: [ + (-0.518, 1.363, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.524, 0.0, -0.0), # C + ], + AA.PRO: [ + (-0.566, 1.351, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.527, -0.0, 0.0), # C + ], + AA.SER: [ + (-0.529, 1.36, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, -0.0, -0.0), # C + ], + AA.THR: [ + (-0.517, 1.364, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.526, 0.0, -0.0), # C + ], + AA.TRP: [ + (-0.521, 1.363, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.525, -0.0, 0.0), # C + ], + AA.TYR: [ + (-0.522, 1.362, 0.0), # N + (0.0, 0.0, 0.0), # CA + (1.524, -0.0, -0.0), # C + ], + AA.VAL: [ + (-0.494, 1.373, -0.0), # N + (0.0, 0.0, 0.0), # CA + (1.527, -0.0, -0.0), # C + ], +} + +bb_oxygen_coordinate = { + AA.ALA: (2.153, -1.062, 0.0), + AA.ARG: (2.151, -1.062, 0.0), + AA.ASN: (2.151, -1.062, 0.0), + AA.ASP: (2.153, -1.062, 0.0), + AA.CYS: (2.149, -1.062, 0.0), + AA.GLN: (2.152, -1.062, 0.0), + AA.GLU: (2.152, -1.062, 0.0), + AA.GLY: (2.143, -1.062, 0.0), + AA.HIS: (2.15, -1.063, 0.0), + AA.ILE: (2.154, -1.062, 0.0), + AA.LEU: (2.15, -1.063, 0.0), + AA.LYS: (2.152, -1.062, 0.0), + AA.MET: (2.15, -1.062, 0.0), + AA.PHE: (2.15, -1.062, 0.0), + AA.PRO: (2.148, -1.066, 0.0), + AA.SER: (2.151, -1.062, 0.0), + AA.THR: (2.152, -1.062, 0.0), + AA.TRP: (2.152, -1.062, 0.0), + AA.TYR: (2.151, -1.062, 0.0), + AA.VAL: (2.154, -1.062, 0.0), +} + +backbone_atom_coordinates_tensor = torch.zeros([21, 3, 3]) +bb_oxygen_coordinate_tensor = torch.zeros([21, 3]) + +def make_coordinate_tensors(): + for restype, atom_coords in backbone_atom_coordinates.items(): + for atom_id, atom_coord in enumerate(atom_coords): + backbone_atom_coordinates_tensor[restype][atom_id] = torch.FloatTensor(atom_coord) + + for restype, bb_oxy_coord in bb_oxygen_coordinate.items(): + bb_oxygen_coordinate_tensor[restype] = torch.FloatTensor(bb_oxy_coord) +make_coordinate_tensors() diff --git a/pepflow/modules/protein/dssp.py b/pepflow/modules/protein/dssp.py new file mode 100644 index 0000000000000000000000000000000000000000..846f6fc0cfcc1163524e65db62da6f2fc680a18b --- /dev/null +++ b/pepflow/modules/protein/dssp.py @@ -0,0 +1,154 @@ + +from collections import OrderedDict, namedtuple + + +SSTRUCT_SYMB_TO_INDEX = {'H':0, 'B':1, 'E':2, 'G':3, 'I':4, 'T':5, 'S':6, '-':7} +NONLOOP_SSTRUCT_INDEX = {0,1,2,3,4} +# Taken from: https://jbloomlab.github.io/dms_tools2/dms_tools2.dssp.html +MAX_ASA_TIEN = { + 'A': 129.0, 'C': 167.0, 'D': 193.0, 'E': 223.0, 'F': 240.0, 'G': 104.0, + 'H': 224.0, 'I': 197.0, 'K': 236.0, 'L': 201.0, 'M': 224.0, 'N': 195.0, + 'P': 159.0, 'Q': 225.0, 'R': 274.0, 'S': 155.0, 'T': 172.0, 'V': 174.0, + 'W': 285.0, 'Y': 263.0, +} + +DSSPResidueInfo = namedtuple('DSSPResidueInfo', [ + 'aa', 'ss', 'acc', 'phi', 'psi', 'index', 'rsa', +]) + + +def secondary_struct_symbol_to_index(s): + if s in SSTRUCT_SYMB_TO_INDEX: + return SSTRUCT_SYMB_TO_INDEX[s] + else: + return 7 + + +def parse_dssp_file(path): + with open(path, 'r') as f: + dssp_dict = make_dssp_dict(f) + return dssp_dict + + +def make_dssp_dict(handle): + """Return a DSSP dictionary, used by mask_dssp_dict (PRIVATE). + DSSP dictionary maps (chainid, resid) to an amino acid, + secondary structure symbol, solvent accessibility value, and hydrogen bond + information (relative dssp indices and hydrogen bond energies) from an open + DSSP file object. + Parameters + ---------- + handle : file or list + the open DSSP output file handle + or the list of lines of the DSSP output file + """ + dssp = OrderedDict() + start = 0 + for l in handle: + sl = l.split() + if len(sl) < 2: + continue + if sl[1] == "RESIDUE": + # Start parsing from here + start = 1 + continue + if not start: + continue + if l[9] == " ": + # Skip -- missing residue + continue + + dssp_index = int(l[:5]) + resseq = int(l[5:10]) + icode = l[10] + chainid = l[11] + aa = l[13] + ss = l[16] + if ss == " ": + ss = "-" + try: + # NH_O_1_relidx = int(l[38:45]) + # NH_O_1_energy = float(l[46:50]) + # O_NH_1_relidx = int(l[50:56]) + # O_NH_1_energy = float(l[57:61]) + # NH_O_2_relidx = int(l[61:67]) + # NH_O_2_energy = float(l[68:72]) + # O_NH_2_relidx = int(l[72:78]) + # O_NH_2_energy = float(l[79:83]) + + acc = int(l[34:38]) + phi = float(l[103:109]) + psi = float(l[109:115]) + except ValueError as exc: + # DSSP output breaks its own format when there are >9999 + # residues, since only 4 digits are allocated to the seq num + # field. See 3kic chain T res 321, 1vsy chain T res 6077. + # Here, look for whitespace to figure out the number of extra + # digits, and shift parsing the rest of the line by that amount. + if l[34] != " ": + shift = l[34:].find(" ") + + # NH_O_1_relidx = int(l[38 + shift : 45 + shift]) + # NH_O_1_energy = float(l[46 + shift : 50 + shift]) + # O_NH_1_relidx = int(l[50 + shift : 56 + shift]) + # O_NH_1_energy = float(l[57 + shift : 61 + shift]) + # NH_O_2_relidx = int(l[61 + shift : 67 + shift]) + # NH_O_2_energy = float(l[68 + shift : 72 + shift]) + # O_NH_2_relidx = int(l[72 + shift : 78 + shift]) + # O_NH_2_energy = float(l[79 + shift : 83 + shift]) + + acc = int(l[34 + shift : 38 + shift]) + phi = float(l[103 + shift : 109 + shift]) + psi = float(l[109 + shift : 115 + shift]) + else: + raise ValueError(exc) from None + res_id = (" ", resseq, icode) + if chainid not in dssp: + dssp[chainid] = OrderedDict() + + if aa in MAX_ASA_TIEN: + rsa = acc / MAX_ASA_TIEN[aa] + else: + rsa = 0.0 + + dssp[chainid][res_id] = DSSPResidueInfo( + index = dssp_index, aa = aa, + ss = ss, phi = phi, psi = psi, + acc = acc, rsa = rsa, + ) + return dssp + + +def find_sstruct_ranges(chain_dict, min_length=5): + sstruct_ranges = [] + start, end = None, None # start, end + for i, (res_key, item) in enumerate(chain_dict.items()): + ss = item.ss + if secondary_struct_symbol_to_index(ss) in NONLOOP_SSTRUCT_INDEX: + if start is None: + start = end = i + else: end = i + else: + if (start is not None) and (end is not None): + if (end-start+1) >= min_length: + sstruct_ranges.append( (start, end, ) ) + start, end = None, None + return sstruct_ranges + + +def find_loop_fragments(chain_dict, min_length=3, max_length=float('inf')): + ss_ranges = find_sstruct_ranges(chain_dict) + # print(ss_ranges) + fragments_all = [] + index_to_reskey = list(chain_dict.keys()) + # for s, e in ss_ranges: + # print(index_to_reskey[s], index_to_reskey[e]) + + for rng_l, rng_r in zip(ss_ranges[:-1], ss_ranges[1:]): + start_l, end_l = rng_l + start_r, end_r = rng_r + loop_length = start_r - end_l - 1 + if min_length <= loop_length <= max_length: + loop_reskeys = [index_to_reskey[i] for i in range(end_l+1, start_r)] + fragments_all.append(loop_reskeys) + return fragments_all diff --git a/pepflow/modules/protein/parsers.py b/pepflow/modules/protein/parsers.py new file mode 100644 index 0000000000000000000000000000000000000000..b290160830f286d6397e4bd2a2e32eb93fb3fd9c --- /dev/null +++ b/pepflow/modules/protein/parsers.py @@ -0,0 +1,193 @@ +import math +import torch +from Bio import PDB +from Bio.PDB import Selection +from Bio.PDB.Residue import Residue +from Bio.PDB.PDBParser import PDBParser +from Bio.PDB.MMCIFParser import MMCIFParser +from easydict import EasyDict + +from pepflow.modules.protein.constants import (AA, max_num_heavyatoms, max_num_hydrogens, + restype_to_heavyatom_names, + restype_to_hydrogen_names, + BBHeavyAtom, non_standard_residue_substitutions) + +from Bio.PDB import PDBParser +from Bio.SeqUtils import seq1 + + +def _get_residue_heavyatom_info(res: Residue): + pos_heavyatom = torch.zeros([max_num_heavyatoms, 3], dtype=torch.float) + mask_heavyatom = torch.zeros([max_num_heavyatoms, ], dtype=torch.bool) + bfactor_heavyatom = torch.zeros([max_num_heavyatoms, ], dtype=torch.float) + restype = AA(res.get_resname()) + for idx, atom_name in enumerate(restype_to_heavyatom_names[restype]): + if atom_name == '': continue + if atom_name in res: + pos_heavyatom[idx] = torch.tensor(res[atom_name].get_coord().tolist(), dtype=pos_heavyatom.dtype) + mask_heavyatom[idx] = True + bfactor_heavyatom[idx] = res[atom_name].get_bfactor() + return pos_heavyatom, mask_heavyatom, bfactor_heavyatom + + +def _get_residue_hydrogen_info(res: Residue): + pos_hydrogen = torch.zeros([max_num_hydrogens, 3], dtype=torch.float) + mask_hydrogen = torch.zeros([max_num_hydrogens, ], dtype=torch.bool) + restype = AA(res.get_resname()) + + for idx, atom_name in enumerate(restype_to_hydrogen_names[restype]): + if atom_name == '': continue + if atom_name in res: + pos_hydrogen[idx] = torch.tensor(res[atom_name].get_coord().tolist(), dtype=pos_hydrogen.dtype) + mask_hydrogen[idx] = True + + return pos_hydrogen, mask_hydrogen + + +def parse_pdb(path, model_id=0, unknown_threshold=1.0): + parser = PDBParser() + structure = parser.get_structure(None, path) + return parse_biopython_structure(structure[model_id], unknown_threshold=unknown_threshold) + + +def parse_mmcif_assembly(path, model_id, assembly_id=0, unknown_threshold=1.0): + parser = MMCIFParser() + structure = parser.get_structure(None, path) + mmcif_dict = parser._mmcif_dict + if '_pdbx_struct_assembly_gen.asym_id_list' not in mmcif_dict: + return parse_biopython_structure(structure[model_id], unknown_threshold=unknown_threshold) + else: + assemblies = [tuple(chains.split(',')) for chains in mmcif_dict['_pdbx_struct_assembly_gen.asym_id_list']] + label_to_auth = {} + for label_asym_id, auth_asym_id in zip(mmcif_dict['_atom_site.label_asym_id'], mmcif_dict['_atom_site.auth_asym_id']): + label_to_auth[label_asym_id] = auth_asym_id + model_real = list({structure[model_id][label_to_auth[ch]] for ch in assemblies[assembly_id]}) + return parse_biopython_structure(model_real) + + +def parse_biopython_structure(entity, unknown_threshold=1.0): + chains = Selection.unfold_entities(entity, 'C') + chains.sort(key=lambda c: c.get_id()) + data = EasyDict({ + 'chain_id': [], 'chain_nb': [], + 'resseq': [], 'icode': [], 'res_nb': [], + 'aa': [], + 'pos_heavyatom': [], 'mask_heavyatom': [], + # 'pos_hydrogen': [], 'mask_hydrogen': [], + # 'bfactor_heavyatom': [], + }) + tensor_types = { + 'chain_nb': torch.LongTensor, + 'resseq': torch.LongTensor, + 'res_nb': torch.LongTensor, + 'aa': torch.LongTensor, + 'pos_heavyatom': torch.stack, + 'mask_heavyatom': torch.stack, + # 'bfactor_heavyatom': torch.stack, + # 'pos_hydrogen': torch.stack, + # 'mask_hydrogen': torch.stack, + } + + count_aa, count_unk = 0, 0 + + for i, chain in enumerate(chains): + seq_this = 0 # Renumbering residues + residues = Selection.unfold_entities(chain, 'R') + residues.sort(key=lambda res: (res.get_id()[1], res.get_id()[2])) # Sort residues by resseq-icode + for _, res in enumerate(residues): + resname = res.get_resname() + if not AA.is_aa(resname): continue + if not (res.has_id('CA') and res.has_id('C') and res.has_id('N')): continue + restype = AA(resname) + count_aa += 1 + if restype == AA.UNK: + count_unk += 1 + continue + + # Chain info + data.chain_id.append(chain.get_id()) + data.chain_nb.append(i) + + # Residue types + data.aa.append(restype) # Will be automatically cast to torch.long + + # Heavy atoms + pos_heavyatom, mask_heavyatom, bfactor_heavyatom = _get_residue_heavyatom_info(res) + data.pos_heavyatom.append(pos_heavyatom) + data.mask_heavyatom.append(mask_heavyatom) + # data.bfactor_heavyatom.append(bfactor_heavyatom) + + # Hydrogen atoms + # pos_hydrogen, mask_hydrogen = _get_residue_hydrogen_info(res) + # data.pos_hydrogen.append(pos_hydrogen) + # data.mask_hydrogen.append(mask_hydrogen) + + # Sequential number + resseq_this = int(res.get_id()[1]) + icode_this = res.get_id()[2] + if seq_this == 0: + seq_this = 1 + else: + d_CA_CA = torch.linalg.norm(data.pos_heavyatom[-2][BBHeavyAtom.CA] - data.pos_heavyatom[-1][BBHeavyAtom.CA], ord=2).item() + if d_CA_CA <= 4.0: + seq_this += 1 + else: + d_resseq = resseq_this - data.resseq[-1] + seq_this += max(2, d_resseq) + + data.resseq.append(resseq_this) + data.icode.append(icode_this) + data.res_nb.append(seq_this) + + if len(data.aa) == 0: + return None, None + + if (count_unk / count_aa) >= unknown_threshold: + return None, None + + seq_map = {} + for i, (chain_id, resseq, icode) in enumerate(zip(data.chain_id, data.resseq, data.icode)): + seq_map[(chain_id, resseq, icode)] = i + + for key, convert_fn in tensor_types.items(): + data[key] = convert_fn(data[key]) + + # # ignore UNKNOWN residues and nobackbone residues, true for used residue + # seq_mask = data['aa'] != AA.UNK + # bb_mask = data['mask_heavyatom'][:, BBHeavyAtom.CA] & data['mask_heavyatom'][:, BBHeavyAtom.C] & data['mask_heavyatom'][:, BBHeavyAtom.N] + # data['res_mask'] = seq_mask & bb_mask + + return data, seq_map + + +def get_fasta_from_pdb(pdb_file): + parser = PDBParser() + seq_dic = {} + structure = parser.get_structure("structure_name", pdb_file) + + for model in structure: + for chain in model: + sequence = "" + for residue in chain: + if AA.is_aa(residue.get_resname()): + if residue.get_resname() == 'UNK': + sequence += 'X' + else: + sequence += PDB.Polypeptide.three_to_one(non_standard_residue_substitutions[residue.get_resname()]) + seq_dic[chain.id] = sequence + + return seq_dic + +# def get_fasta_from_pdb(pdb_file): +# parser = PDBParser() +# structure = parser.get_structure("pdb", pdb_file) + +# fasta_sequence = "" +# for chain in structure.get_chains(): +# for residue in chain.get_residues(): +# if residue.get_resname() in seq1(''): +# fasta_sequence += seq1(residue.get_resname()) + +# return fasta_sequence + + diff --git a/pepflow/modules/protein/writers.py b/pepflow/modules/protein/writers.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3b536e1b98414093626b529a79f51e6ad64d66 --- /dev/null +++ b/pepflow/modules/protein/writers.py @@ -0,0 +1,88 @@ +import torch +import warnings +from Bio import BiopythonWarning +from Bio.PDB import PDBIO +from Bio.PDB.StructureBuilder import StructureBuilder + +from pepflow.modules.protein.constants import AA, restype_to_allatom_names, restype_to_heavyatom_names + + +def save_pdb(data, path=None): + + # for k,v in data.items(): + # if isinstance(v, torch.Tensor): + # print(f'{k},{v.shape}') + # else: + # print(f'{k},{len(v)}') + + def _mask_select(v, mask): + if isinstance(v, str): + return ''.join([s for i, s in enumerate(v) if mask[i]]) + elif isinstance(v, list): + return [s for i, s in enumerate(v) if mask[i]] + elif isinstance(v, torch.Tensor): + return v[mask] + else: + return v + + def _build_chain(builder, aa_ch, pos_heavyatom_ch, mask_heavyatom_ch, chain_id_ch, resseq_ch, icode_ch): + builder.init_chain(chain_id_ch[0]) + builder.init_seg(' ') + + for aa_res, pos_allatom_res, mask_allatom_res, resseq_res, icode_res in \ + zip(aa_ch, pos_heavyatom_ch, mask_heavyatom_ch, resseq_ch, icode_ch): + restype = AA(aa_res.item()) + + # print(resseq_ch) + # tmp = [str(restype),' ',resseq_res.item(),icode_res] + # print(chain_id_ch[0]) + # print(tmp) + + + builder.init_residue( + resname = str(restype), + field = ' ', + resseq = resseq_res.item(), + icode = icode_res, + ) + + for i, atom_name in enumerate(restype_to_heavyatom_names[restype]): + if atom_name == '': continue # No expected atom + if (~mask_allatom_res[i]).any(): continue # Atom is missing + if len(atom_name) == 1: fullname = ' %s ' % atom_name + elif len(atom_name) == 2: fullname = ' %s ' % atom_name + elif len(atom_name) == 3: fullname = ' %s' % atom_name + else: fullname = atom_name # len == 4 + builder.init_atom(atom_name, pos_allatom_res[i].tolist(), 0.0, 1.0, ' ', fullname,) + + warnings.simplefilter('ignore', BiopythonWarning) + builder = StructureBuilder() + builder.init_structure(0) + builder.init_model(0) + + unique_chain_nb = data['chain_nb'].unique().tolist() + for ch_nb in unique_chain_nb: + mask = (data['chain_nb'] == ch_nb) + # print(mask) + aa = _mask_select(data['aa'], mask) + pos_heavyatom = _mask_select(data['pos_heavyatom'], mask) + mask_heavyatom = _mask_select(data['mask_heavyatom'], mask) + chain_id = _mask_select(data['chain_id'], mask) + resseq = _mask_select(data['resseq'], mask) + icode = _mask_select(data['icode'], mask) + + # print(aa.shape) + # print(pos_heavyatom.shape) + # print(mask_heavyatom.shape) + # print(chain_id) + # print(resseq.shape) + # print(icode) + + _build_chain(builder, aa, pos_heavyatom, mask_heavyatom, chain_id, resseq, icode) + + structure = builder.get_structure() + if path is not None: + io = PDBIO() + io.set_structure(structure) + io.save(path) + return structure diff --git a/pepflow/modules/so3/dist.py b/pepflow/modules/so3/dist.py new file mode 100644 index 0000000000000000000000000000000000000000..aef6a523ef664f4c7f4f01715cce9c4382d82924 --- /dev/null +++ b/pepflow/modules/so3/dist.py @@ -0,0 +1,919 @@ +from pepflow.modules.so3.utils import * + +import logging +import os +from typing import Callable, Dict, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +from tqdm import tqdm + +from scipy.spatial.transform import Rotation + +logger = logging.getLogger(__name__) + +# Return angle of rotation. SO(3) to R^+ +def Omega(R): return torch.arccos((torch.diagonal(R, dim1=-2, dim2=-1).sum(axis=-1)-1)/2) + +# Power series expansion in the IGSO3 density. +def f_igso3(omega, t, L=500): + ls = torch.arange(L)[None] # of shape [1, L] + return ((2*ls + 1) * torch.exp(-ls*(ls+1)*t/2) * + torch.sin(omega[:, None]*(ls+1/2)) / torch.sin(omega[:, None]/2)).sum(dim=-1) + +# IGSO3(Rt; I_3, t), density with respect to the volume form on SO(3) +def igso3_density(Rt, t, L=500): return f_igso3(Omega(Rt), t, L) + +# Marginal density of rotation angle for uniform density on SO(3) +def angle_density_unif(omega): + return (1-torch.cos(omega))/np.pi + +# Normal sample in tangent space at R0 +def tangent_gaussian(R0): return torch.einsum('...ij,...jk->...ik', R0, hat(torch.randn(R0.shape[0], 3))) + +def centered_gaussian(num_batch, num_res, device='cpu'): + # torch.manual_seed(0) + noise = torch.randn(num_batch, num_res, 3, device=device) + return noise - torch.mean(noise, dim=-2, keepdims=True) + +def uniform_so3(num_batch, num_res, device='cpu'): + return torch.tensor( + Rotation.random(num_batch*num_res).as_matrix(), + device=device, + dtype=torch.float32, + ).reshape(num_batch, num_res, 3, 3) + +class SO3LookupCache: + def __init__( + self, + cache_dir: str, + cache_file: str, + overwrite: bool = False, + ) -> None: + """ + Auxiliary class for handling storage / loading of SO(3) lookup tables in npz format. + + Args: + cache_dir: Path to the cache directory. + cache_file: Basic file name of the cache file. + overwrite: Whether existing cache files should be overwritten if requested. + """ + if not cache_file.endswith(".npz"): + raise ValueError("Filename should have '.npz' extension.") + self.cache_file = cache_file + self.cache_dir = cache_dir + self.cache_path = os.path.join(cache_dir, cache_file) + self.overwrite = overwrite + + @property + def path_exists(self) -> bool: + return os.path.exists(self.cache_path) + + @property + def dir_exists(self) -> bool: + return os.path.exists(self.cache_dir) + + def delete_cache(self) -> None: + """ + Delete the cache file. + """ + if self.path_exists: + os.remove(self.cache_path) + + def load_cache(self) -> Dict[str, torch.Tensor]: + """ + Load data from the cache file. + + Returns: + Dictionary of loaded data tensors. + """ + if self.path_exists: + # Load data and convert to torch tensors. + npz_data = np.load(self.cache_path) + torch_dict = {f: torch.from_numpy(npz_data[f]) for f in npz_data.files} + logger.info(f"Data loaded from {self.cache_path}") + return torch_dict + else: + raise ValueError(f"No cache data found at {self.cache_path}.") + + def save_cache(self, data: Dict[str, torch.Tensor]) -> None: + """ + Save a dictionary of tensors to the cache file. If overwrite is set to True, an existing + file is overwritten, otherwise a warning is raised and the file is not modified. + + Args: + data: Dictionary of tensors that should be saved to the cache. + """ + if not self.dir_exists: + os.makedirs(self.cache_dir) + + if self.path_exists: + if self.overwrite: + logger.info("Overwriting cache ...") + self.delete_cache() + else: + logger.warn( + f"Cache at {self.cache_path} exits and overwriting disabled. Doing nothing." + ) + else: + # Move everything to CPU and numpy and store. + logger.info(f"Data saved to {self.cache_path}") + numpy_dict = {k: v.detach().cpu().numpy() for k, v in data.items()} + np.savez(self.cache_path, **numpy_dict) + + +class BaseSampleSO3(nn.Module): + so3_type: str = "base" # cache basename + + def __init__( + self, + num_omega: int, + sigma_grid: torch.Tensor, + omega_exponent: int = 3, + tol: float = 1e-7, + interpolate: bool = True, + cache_dir: Optional[str] = None, + overwrite_cache: bool = False, + device: str = 'cpu', + ) -> None: + """ + Base torch.nn module for sampling rotations from the IGSO(3) distribution. Samples are + created by uniformly sampling a rotation axis and using inverse transform sampling for + the angles. The latter uses the associated SO(3) cumulative probability distribution + function (CDF) and a uniform distribution [0,1] as described in [#leach2022_1]_. CDF values + are obtained by numerically integrating the probability distribution evaluated on a grid of + angles and noise levels and stored in a lookup table. Linear interpolation is used to + approximate continuos sampling of the function. Angles are discretized in an interval [0,pi] + and the grid can be squashed to have higher resolutions at low angles by taking different + powers. Since sampling relies on tabulated values of the CDF and indexing in the form of + `torch.bucketize`, gradients are not supported. + + Args: + num_omega (int): Number of discrete angles used for generating the lookup table. + sigma_grid (torch.Tensor): Grid of IGSO3 std devs. + omega_exponent (int, optional): Make the angle grid denser for smaller angles by taking + its power with the provided number. Defaults to 3. + tol (float, optional): Small value for numerical stability. Defaults to 1e-7. + interpolate (bool, optional): If enables, perform linear interpolation of the angle CDF + to sample angles. Otherwise the closest tabulated point is returned. Defaults to True. + cache_dir: Path to an optional cache directory. If set to None, lookup tables are + computed on the fly. + overwrite_cache: If set to true, existing cache files are overwritten. Can be used for + updating stale caches. + + References + ---------- + .. [#leach2022_1] Leach, Schmon, Degiacomi, Willcocks: + Denoising diffusion probabilistic models on so (3) for rotational alignment. + ICLR 2022 Workshop on Geometrical and Topological Representation Learning. 2022. + """ + super().__init__() + self.num_omega = num_omega + self.omega_exponent = omega_exponent + self.tol = tol + self.interpolate = interpolate + self.device = device + self.register_buffer("sigma_grid", sigma_grid, persistent=False) + + # Generate / load lookups and store in non-persistent buffers. + omega_grid, cdf_igso3 = self._setup_lookup(sigma_grid, cache_dir, overwrite_cache) + self.register_buffer("omega_grid", omega_grid, persistent=False) + self.register_buffer("cdf_igso3", cdf_igso3, persistent=False) + + def _setup_lookup( + self, + sigma_grid: torch.Tensor, + cache_dir: Optional[str] = None, + overwrite_cache: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Master function for setting up the lookup tables. These can either be loaded from a npz + cache file or computed on the fly. Lookup tables will always be created and stored in double + precision. Casting to the target dtype is done at the end of the function. + + Args: + sigma_grid: Grid of sigma values used for computing the lookup tables. + cache_dir: Path to the cache directory. + overwrite_cache: If set to true, an existing cache is overwritten. Can be used for + updating stale caches. + + Returns: + Grid of angle values and SO(3) cumulative distribution function. + """ + if cache_dir is not None: + cache_name = self._get_cache_name() + cache = SO3LookupCache(cache_dir, cache_name, overwrite=True) + + # If cache dir is provided, check whether the necessary cache exists and whether it + # should be overwritten. + if cache.path_exists and not overwrite_cache: + # Load data from cache. + cache_data = cache.load_cache() + omega_grid = cache_data["omega_grid"] + cdf_igso3 = cache_data["cdf_igso3"] + else: + # Store data in cache (overwrite if requested). + omega_grid, cdf_igso3 = self._generate_lookup(sigma_grid) + cache.save_cache({"omega_grid": omega_grid, "cdf_igso3": cdf_igso3}) + else: + # Other wise just generate the tables. + omega_grid, cdf_igso3 = self._generate_lookup(sigma_grid) + + return omega_grid.to(sigma_grid.dtype), cdf_igso3.to(sigma_grid.dtype) + + def _get_cache_name(self) -> str: + """ + Auxiliary function for determining the cache file name based on the parameters (sigma, + omega, l, etc.) used for generating the lookup tables. + + Returns: + Base name of the cache file. + """ + cache_name = "cache_{:s}_s{:04.3f}-{:04.3f}-{:d}_o{:d}-{:d}.npz".format( + self.so3_type, + torch.min(self.sigma_grid).cpu().item(), + torch.max(self.sigma_grid).cpu().item(), + self.sigma_grid.shape[0], + self.num_omega, + self.omega_exponent, + ) + return cache_name + + def get_sigma_idx(self, sigma: torch.Tensor) -> torch.Tensor: + """ + Convert continuous sigmas to the indices of the closest tabulated values. + + Args: + sigma (torch.Tensor): IGSO3 std devs. + + Returns: + torch.Tensor: Index tensor mapping the provided sigma values to the internal lookup + table. + """ + return torch.bucketize(sigma, self.sigma_grid) + + def expansion_function( + self, omega_grid: torch.Tensor, sigma_grid: torch.Tensor + ) -> torch.Tensor: + """ + Function for generating the angle probability distribution. Should return a 2D tensor with + values for the std dev at the first dimension (rows) and angles at the second + (columns). + + Args: + omega_grid (torch.Tensor): Grid of angle values. + sigma_grid (torch.Tensor): IGSO3 std devs. + + Returns: + torch.Tensor: Distribution for angles discretized on a 2D grid. + """ + raise NotImplementedError + + @torch.no_grad() + def _generate_lookup(self, sigma_grid: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Generate the lookup table for sampling from the target SO(3) CDF. The table is 2D, with the + rows corresponding to different sigma values and the columns with angles computed on a grid. + Variance is scaled by a factor of 1/2 to account for the deacceleration of time in the + diffusion process due to the choice of SO(3) basis and guarantee time-reversibility (see + appendix E.3 in [#yim2023_2]_). The returned tables are double precision and will be cast + to the target dtype in `_setup_lookup`. + + Args: + sigma_grid (torch.Tensor): Grid of IGSO3 std devs. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple containing the grid used to compute the angles + and the associated lookup table. + + References + ---------- + .. [#yim2023_2] Yim, Trippe, De Bortoli, Mathieu, Doucet, Barzilay, Jaakkola: + SE(3) diffusion model with application to protein backbone generation. + arXiv preprint arXiv:2302.02277. 2023. + """ + + current_device = sigma_grid.device + sigma_grid_tmp = sigma_grid.to(torch.float64) + + # If cuda is available, initialize everything on GPU. + # Even if Pytorch Lightning usually handles GPU allocation after initialization, this is + # required to initialize the module in GPU reducing the initializaiton time by orders of magnitude. + if torch.cuda.is_available(): + sigma_grid_tmp = sigma_grid_tmp.to(device=self.device) + + # Set up grid for angle resolution. Convert to double precision for better handling of numerics. + omega_grid = torch.linspace(0.0, 1, self.num_omega + 1).to(sigma_grid_tmp) + + # If requested, increase sample density for lower values + omega_grid = omega_grid**self.omega_exponent + + omega_grid = omega_grid * np.pi + + # Compute the expansion for all omegas and sigmas. + pdf_igso3 = self.expansion_function(omega_grid, sigma_grid_tmp) + + # Apply the pre-factor from USO(3). + pdf_igso3 = pdf_igso3 * (1.0 - torch.cos(omega_grid)) / np.pi + + # Compute the cumulative probability distribution. + cdf_igso3 = integrate_trapezoid_cumulative(pdf_igso3, omega_grid) + # Normalize integral area to 1. + cdf_igso3 = cdf_igso3 / cdf_igso3[:, -1][:, None] + + # Move back to original device. + cdf_igso3 = cdf_igso3.to(device=current_device) + omega_grid = omega_grid.to(device=current_device) + + return omega_grid[1:].to(sigma_grid.dtype), cdf_igso3.to(sigma_grid.dtype) + + def sample(self, sigma: torch.Tensor, num_samples: int) -> torch.Tensor: + """ + Generate samples from the target SO(3) distribution by sampling a rotation axis angle, + which are then combined into a rotation vector and transformed into the corresponding + rotation matrix via an exponential map. + + Args: + sigma_indices (torch.Tensor): Indices of the IGSO3 std devs for which to take samples. + num_samples (int): Number of angle samples to take for each std dev + + Returns: + torch.Tensor: Sampled rotations in matrix representation with dimensions + [num_sigma x num_samples x 3 x 3]. + """ + # torch.manual_seed(0) + + vectors = self.sample_vector(sigma.shape[0], num_samples) + angles = self.sample_angle(sigma, num_samples) + + # Do postprocessing on angles. + angles = self._process_angles(sigma, angles) + + rotation_vectors = vectors * angles[..., None] + + rotation_matrices = rotvec_to_rotmat(rotation_vectors, tol=self.tol) + return rotation_matrices + + def _process_angles(self, sigma: torch.Tensor, angles: torch.Tensor) -> torch.Tensor: + """ + Auxiliary function for performing additional processing steps on the sampled angles. One + example would be to ensure sampled angles are 0 for a std dev of 0 for IGSO(3). + + Args: + sigma (torch.Tensor): Current values of sigma. + angles (torch.Tensor): Sampled angles. + + Returns: + torch.Tensor: Processed sampled angles. + """ + return angles + + def sample_vector(self, num_sigma: int, num_samples: int) -> torch.Tensor: + """ + Uniformly sample rotation axis for constructing the overall rotation. + + Args: + num_sigma (int): Number of samples to draw for each std dev. + num_samples (int): Number of angle samples to take for each std dev. + + Returns: + torch.Tensor: Batch of rotation axes with dimensions [num_sigma x num_samples x 3]. + """ + vectors = torch.randn(num_sigma, num_samples, 3, device=self.sigma_grid.device) + vectors = vectors / torch.norm(vectors, dim=2, keepdim=True) + return vectors + + def sample_angle(self, sigma: torch.Tensor, num_samples: int) -> torch.Tensor: + """ + Create a series of samples from the IGSO(3) angle distribution. + + Args: + sigma_indices (torch.Tensor): Indices of the IGSO3 std deves for which to + take samples. + num_samples (int): Number of angle samples to take for each std dev. + + Returns: + torch.Tensor: Collected samples, will have the dimension [num_sigma x num_samples]. + """ + # Convert sigmas to respective indices for lookup table. + sigma_indices = self.get_sigma_idx(sigma) + # Get relevant sigma slices from stored CDFs. + cdf_tmp = self.cdf_igso3[sigma_indices, :] + + # Draw from uniform distribution. + p_uniform = torch.rand((*sigma_indices.shape, *[num_samples]), device=sigma_indices.device) + + # Determine indices for CDF. + idx_stop = torch.sum(cdf_tmp[..., None] < p_uniform[:, None, :], dim=1).long() + idx_start = torch.clamp(idx_stop - 1, min=0) + + if not self.interpolate: + omega = torch.gather(cdf_tmp, dim=1, index=idx_stop) + else: + # Get CDF values. + cdf_start = torch.gather(cdf_tmp, dim=1, index=idx_start) + cdf_stop = torch.gather(cdf_tmp, dim=1, index=idx_stop) + + # Compute weights for linear interpolation. + cdf_delta = torch.clamp(cdf_stop - cdf_start, min=self.tol) + cdf_weight = torch.clamp((p_uniform - cdf_start) / cdf_delta, min=0.0, max=1.0) + + # Get angle range for interpolation. + omega_start = self.omega_grid[idx_start] + omega_stop = self.omega_grid[idx_stop] + + # Interpolate. + omega = torch.lerp(omega_start, omega_stop, cdf_weight) + + return omega + + +class SampleIGSO3(BaseSampleSO3): + so3_type = "igso3" # cache basename + + def __init__( + self, + num_omega: int, + sigma_grid: torch.Tensor, + omega_exponent: int = 3, + tol: float = 1e-7, + interpolate: bool = True, + l_max: int = 1000, + cache_dir: Optional[str] = None, + overwrite_cache: bool = False, + device: str = 'cpu', + ) -> None: + """ + Module for sampling rotations from the IGSO(3) distribution using the explicit series + expansion. Samples are created using inverse transform sampling based on the associated + cumulative probability distribution function (CDF) and a uniform distribution [0,1] as + described in [#leach2022_2]_. CDF values are obtained by numerically integrating the + probability distribution evaluated on a grid of angles and noise levels and stored in a + lookup table. Linear interpolation is used to approximate continuos sampling of the + function. Angles are discretized in an interval [0,pi] and the grid can be squashed to have + higher resolutions at low angles by taking different powers. + Since sampling relies on tabulated values of the CDF and indexing in the form of + `torch.bucketize`, gradients are not supported. + + Args: + num_omega (int): Number of discrete angles used for generating the lookup table. + sigma_grid (torch.Tensor): Grid of IGSO3 std devs. + omega_exponent (int, optional): Make the angle grid denser for smaller angles by taking + its power with the provided number. Defaults to 3. + tol (float, optional): Small value for numerical stability. Defaults to 1e-7. + interpolate (bool, optional): If enables, perform linear interpolation of the angle CDF + to sample angles. Otherwise the closest tabulated point is returned. Defaults to True. + l_max (int, optional): Maximum number of terms used in the series expansion. + cache_dir: Path to an optional cache directory. If set to None, lookup tables are + computed on the fly. + overwrite_cache: If set to true, existing cache files are overwritten. Can be used for + updating stale caches. + + References + ---------- + .. [#leach2022_2] Leach, Schmon, Degiacomi, Willcocks: + Denoising diffusion probabilistic models on so (3) for rotational alignment. + ICLR 2022 Workshop on Geometrical and Topological Representation Learning. 2022. + """ + self.l_max = l_max + super().__init__( + num_omega=num_omega, + sigma_grid=sigma_grid, + omega_exponent=omega_exponent, + tol=tol, + interpolate=interpolate, + cache_dir=cache_dir, + overwrite_cache=overwrite_cache, + device=device, + ) + + def _get_cache_name(self) -> str: + """ + Auxiliary function for determining the cache file name based on the parameters (sigma, + omega, l, etc.) used for generating the lookup tables. + + Returns: + Base name of the cache file. + """ + cache_name = "cache_{:s}_s{:04.3f}-{:04.3f}-{:d}_l{:d}_o{:d}-{:d}.npz".format( + self.so3_type, + torch.min(self.sigma_grid).cpu().item(), + torch.max(self.sigma_grid).cpu().item(), + self.sigma_grid.shape[0], + self.l_max, + self.num_omega, + self.omega_exponent, + ) + return cache_name + + def expansion_function( + self, + omega_grid: torch.Tensor, + sigma_grid: torch.Tensor, + ) -> torch.Tensor: + """ + Use the truncated expansion of the IGSO(3) probability function to generate the lookup table. + + Args: + omega_grid (torch.Tensor): Grid of angle values. + sigma_grid (torch.Tensor): Grid of IGSO3 std devs. + + Returns: + torch.Tensor: IGSO(3) distribution for angles discretized on a 2D grid. + """ + return generate_igso3_lookup_table(omega_grid, sigma_grid, l_max=self.l_max, tol=self.tol) + + def _process_angles(self, sigma: torch.Tensor, angles: torch.Tensor) -> torch.Tensor: + """ + Ensure sampled angles are 0 for small noise levels in IGSO(3). (Series expansion gives + uniform probability distribution.) + + Args: + sigma (torch.Tensor): Current values of sigma. + angles (torch.Tensor): Sampled angles. + + Returns: + torch.Tensor: Processed sampled angles. + """ + angles = torch.where( + sigma[..., None] < self.tol, + torch.zeros_like(angles), + angles, + ) + return angles + + + +class SampleUSO3: + def sample(self, sigma: torch.Tensor, num_samples: int): + return torch.tensor(Rotation.random(num_samples).as_matrix(), dtype=torch.float32) + +# class SampleUSO3(BaseSampleSO3): +# so3_type = "uso3" # cache basename + +# def __init__( +# self, +# num_omega: int, +# sigma_grid: torch.Tensor, +# omega_exponent: int = 3, +# tol: float = 1e-7, +# interpolate: bool = True, +# cache_dir: Optional[str] = None, +# overwrite_cache: bool = False, +# ) -> None: +# """ +# Module for sampling rotations from the USO(3) distribution. Can be used to generate initial +# unbiased samples in the reverse process. Samples are created using inverse transform +# sampling based on the associated cumulative probability distribution function (CDF) and a +# uniform distribution [0,1] as described in [#leach2022_4]_. CDF values are obtained by +# numerically integrating the probability distribution evaluated on a grid of angles and noise +# levels and stored in a lookup table. Linear interpolation is used to approximate continuos +# sampling of the function. Angles are discretized in an interval [0,pi] and the grid can be +# squashed to have higher resolutions at low angles by taking different powers. +# Since sampling relies on tabulated values of the CDF and indexing in the form of +# `torch.bucketize`, gradients are not supported. + +# Args: +# num_omega (int): Number of discrete angles used for generating the lookup table. +# sigma_grid (torch.Tensor): Grid of IGSO3 std devs. +# omega_exponent (int, optional): Make the angle grid denser for smaller angles by taking +# its power with the provided number. Defaults to 3. +# tol (float, optional): Small value for numerical stability. Defaults to 1e-7. +# interpolate (bool, optional): If enables, perform linear interpolation of the angle CDF +# to sample angles. Otherwise the closest tabulated point is returned. Defaults to True. +# cache_dir: Path to an optional cache directory. If set to None, lookup tables are +# computed on the fly. +# overwrite_cache: If set to true, existing cache files are overwritten. Can be used for +# updating stale caches. + +# References +# ---------- +# .. [#leach2022_4] Leach, Schmon, Degiacomi, Willcocks: +# Denoising diffusion probabilistic models on so (3) for rotational alignment. +# ICLR 2022 Workshop on Geometrical and Topological Representation Learning. 2022. +# """ +# super().__init__( +# num_omega=num_omega, +# sigma_grid=sigma_grid, +# omega_exponent=omega_exponent, +# tol=tol, +# interpolate=interpolate, +# cache_dir=cache_dir, +# overwrite_cache=overwrite_cache, +# ) + +# def get_sigma_idx(self, sigma: torch.Tensor) -> torch.Tensor: +# return torch.zeros_like(sigma).long() + +# def sample_shape(self, num_sigma: int, num_samples: int) -> torch.Tensor: +# dummy_sigma = torch.zeros(num_sigma, device=self.sigma_grid.device) +# return self.sample(dummy_sigma, num_samples) + +# def expansion_function( +# self, +# omega_grid: torch.Tensor, +# sigma_grid: torch.Tensor, +# ) -> torch.Tensor: +# """ +# The probability density function of the uniform SO(3) distribution is the cosine scaling +# term (1-cos(omega))/pi which is applied automatically during sampling. This means, it is +# sufficient to return a tensor of ones to create the correct USO(3) lookup table. + +# Args: +# omega_grid (torch.Tensor): Grid of angle values. +# sigma_grid (torch.Tensor): Grid of IGSO3 std devs. + +# Returns: +# torch.Tensor: USO(3) distribution for angles discretized on a 2D grid. +# """ +# return torch.ones(1, omega_grid.shape[0], device=omega_grid.device) + + +@torch.no_grad() +def integrate_trapezoid_cumulative(f_grid: torch.Tensor, x_grid: torch.Tensor) -> torch.Tensor: + """ + Auxiliary function for numerically integrating a discretized 1D function using the trapezoid + rule. This is mainly used for computing the cumulative probability distributions for sampling + from the IGSO(3) distribution. Works on a single 1D grid or a batch of grids. + + Args: + f_grid (torch.Tensor): Discretized function values. + x_grid (torch.Tensor): Discretized input values. + + Returns: + torch.Tensor: Integrated function (not normalized). + """ + f_sum = f_grid[..., :-1] + f_grid[..., 1:] + delta_x = torch.diff(x_grid, dim=-1) + integral = torch.cumsum((f_sum * delta_x[None, :]) / 2.0, dim=-1) + return integral + + +def uniform_so3_density(omega: torch.Tensor) -> torch.Tensor: + """ + Compute the density over the uniform angle distribution in SO(3). + + Args: + omega: Angles in radians. + + Returns: + Uniform distribution density. + """ + return (1.0 - torch.cos(omega)) / np.pi + + +def igso3_expansion( + omega: torch.Tensor, sigma: torch.Tensor, l_grid: torch.Tensor, tol=1e-7 +) -> torch.Tensor: + """ + Compute the IGSO(3) angle probability distribution function for pairs of angles and std dev + levels. The expansion is computed using a grid of expansion orders ranging from 0 to l_max. + + This function approximates the power series in equation 5 of [#yim2023_3]_. With this + parameterization, IGSO(3) agrees with the Brownian motion on SO(3) with t=sigma^2. + + Args: + omega: Values of angles (1D tensor). + sigma: Values of std dev of IGSO3 distribution (1D tensor of same shape as `omega`). + l_grid: Tensor containing expansion orders (0 to l_max). + tol: Small offset for numerical stability. + + Returns: + IGSO(3) angle distribution function (without pre-factor for uniform SO(3) distribution). + + References + ---------- + .. [#yim2023_3] Yim, Trippe, De Bortoli, Mathieu, Doucet, Barzilay, Jaakkola: + SE(3) diffusion model with application to protein backbone generation. + arXiv preprint arXiv:2302.02277. 2023. + """ + # Pre-compute sine in denominator and clamp for stability. + denom_sin = torch.sin(0.5 * omega) + + # Pre-compute terms that rely only on expansion orders. + l_fac_1 = 2.0 * l_grid + 1.0 + l_fac_2 = -l_grid * (l_grid + 1.0) + + # Pre-compute numerator of expansion which only depends on angles. + numerator_sin = torch.sin((l_grid[None, :] + 1 / 2) * omega[:, None]) + + # Pre-compute exponential term with (2l+1) prefactor. + exponential_term = l_fac_1[None, :] * torch.exp(l_fac_2[None, :] * sigma[:, None] ** 2 / 2) + + # Compute series expansion + f_igso = torch.sum(exponential_term * numerator_sin, dim=1) + # For small omega, accumulate limit of sine fraction instead: + # lim[x->0] sin((l+1/2)x) / sin(x/2) = 2l + 1 + f_limw = torch.sum(exponential_term * l_fac_1[None, :], dim=1) + + # Finalize expansion. Offset for stability can be added since omega is [0,pi] and sin(omega/2) + # is positive in this interval. + f_igso = f_igso / (denom_sin + tol) + + # Replace values at small omega with limit. + f_igso = torch.where(omega <= tol, f_limw, f_igso) + + # Remove remaining numerical problems + f_igso = torch.where( + torch.logical_or(torch.isinf(f_igso), torch.isnan(f_igso)), torch.zeros_like(f_igso), f_igso + ) + + return f_igso + + +def digso3_expansion( + omega: torch.Tensor, sigma: torch.Tensor, l_grid: torch.Tensor, tol=1e-7 +) -> torch.Tensor: + """ + Compute the derivative of the IGSO(3) angle probability distribution function with respect to + the angles for pairs of angles and std dev levels. As in `igso3_expansion` a grid is used for the + expansion levels. Evaluates the derivative directly in order to avoid second derivatives during + backpropagation. + + The derivative of the angle-dependent part is computed as: + + .. math :: + \frac{\partial}{\partial \omega} \frac{\sin((l+\tfrac{1}{2})\omega)}{\sin(\tfrac{1}{2}\omega)} = \frac{l\sin((l+1)\omega) - (l+1)\sin(l\omega)}{1 - \cos(\omega)} + + (obtained via quotient rule + different trigonometric identities). + + Args: + omega: Values of angles (1D tensor). + sigma: Values of IGSO3 distribution std devs (1D tensor of same shape as `omega`). + l_grid: Tensor containing expansion orders (0 to l_max). + tol: Small offset for numerical stability. + + Returns: + IGSO(3) angle distribution derivative (without pre-factor for uniform SO(3) distribution). + """ + denom_cos = 1.0 - torch.cos(omega) + + l_fac_1 = 2.0 * l_grid + 1.0 + l_fac_2 = l_grid + 1.0 + l_fac_3 = -l_grid * l_fac_2 + + # Pre-compute numerator of expansion which only depends on angles. + numerator_sin = l_grid[None, :] * torch.sin(l_fac_2[None, :] * omega[:, None]) - l_fac_2[ + None, : + ] * torch.sin(l_grid[None, :] * omega[:, None]) + + # Compute series expansion + df_igso = torch.sum( + l_fac_1[None, :] * torch.exp(l_fac_3[None, :] * sigma[:, None] ** 2 / 2) * numerator_sin, + dim=1, + ) + + # Finalize expansion. Offset for stability can be added since omega is [0,pi] and cosine term + # is positive in this interval. + df_igso = df_igso / (denom_cos + tol) + + # Replace values at small omega with limit (=0). + df_igso = torch.where(omega <= tol, torch.zeros_like(df_igso), df_igso) + + # Remove remaining numerical problems + df_igso = torch.where( + torch.logical_or(torch.isinf(df_igso), torch.isnan(df_igso)), + torch.zeros_like(df_igso), + df_igso, + ) + + return df_igso + + +def dlog_igso3_expansion( + omega: torch.Tensor, sigma: torch.Tensor, l_grid: torch.Tensor, tol=1e-7 +) -> torch.Tensor: + """ + Compute the derivative of the logarithm of the IGSO(3) angle distribution function for pairs of + angles and std dev levels: + + .. math :: + \frac{\partial}{\partial \omega} \log f(\omega) = \frac{\tfrac{\partial}{\partial \omega} f(\omega)}{f(\omega)} + + Required for SO(3) score computation. + + Args: + omega: Values of angles (1D tensor). + sigma: Values of IGSO3 std devs (1D tensor of same shape as `omega`). + l_grid: Tensor containing expansion orders (0 to l_max). + tol: Small offset for numerical stability. + + Returns: + IGSO(3) angle distribution derivative (without pre-factor for uniform SO(3) distribution). + """ + f_igso3 = igso3_expansion(omega, sigma, l_grid, tol=tol) + df_igso3 = digso3_expansion(omega, sigma, l_grid, tol=tol) + + return df_igso3 / (f_igso3 + tol) + + +@torch.no_grad() +def generate_lookup_table( + base_function: Callable, + omega_grid: torch.Tensor, + sigma_grid: torch.Tensor, + l_max: int = 1000, + tol: float = 1e-7, +): + """ + Auxiliary function for generating a lookup table from IGSO(3) expansions and their derivatives. + Takes a basic function and loops over different std dev levels. + + Args: + base_function: Function used for setting up the lookup table. + omega_grid: Grid of angle values ranging from [0,pi] (shape is[num_omega]). + sigma_grid: Grid of IGSO3 std dev values (shape is [num_sigma]). + l_max: Number of terms used in the series expansion. + tol: Small value for numerical stability. + + Returns: + Table of function values evaluated at different angles and std dev levels. The final shape is + [num_sigma x num_omega]. + """ + # Generate grid of expansion orders. + l_grid = torch.arange(l_max + 1, device=omega_grid.device).to(omega_grid.dtype) + + n_omega = len(omega_grid) + n_sigma = len(sigma_grid) + + # Populate lookup table for different time frames. + f_table = torch.zeros(n_sigma, n_omega, device=omega_grid.device, dtype=omega_grid.dtype) + + for eps_idx in tqdm(range(n_sigma), desc=f"Computing {base_function.__name__}"): + f_table[eps_idx, :] = base_function( + omega_grid, + torch.ones_like(omega_grid) * sigma_grid[eps_idx], + l_grid, + tol=tol, + ) + + return f_table + + +def generate_igso3_lookup_table( + omega_grid: torch.Tensor, + sigma_grid: torch.Tensor, + l_max: int = 1000, + tol: float = 1e-7, +) -> torch.Tensor: + """ + Generate a lookup table for the IGSO(3) probability distribution function of angles. + + Args: + omega_grid: Grid of angle values ranging from [0,pi] (shape is[num_omega]). + sigma_grid: Grid of IGSO3 std dev values (shape is [num_sigma]). + l_max: Number of terms used in the series expansion. + tol: Small value for numerical stability. + + Returns: + Table of function values evaluated at different angles and std dev levels. The final shape is + [num_sigma x num_omega]. + """ + f_igso = generate_lookup_table( + base_function=igso3_expansion, + omega_grid=omega_grid, + sigma_grid=sigma_grid, + l_max=l_max, + tol=tol, + ) + return f_igso + + +def generate_dlog_igso3_lookup_table( + omega_grid: torch.Tensor, + sigma_grid: torch.Tensor, + l_max: int = 1000, + tol: float = 1e-7, +) -> torch.Tensor: + """ + Generate a lookup table for the derivative of the logarithm of the angular IGSO(3) probability + distribution function. Used e.g. for computing scaling of SO(3) norms. + + Args: + omega_grid: Grid of angle values ranging from [0,pi] (shape is[num_omega]). + sigma_grid: Grid of IGSO3 std dev values (shape is [num_sigma]). + l_max: Number of terms used in the series expansion. + tol: Small value for numerical stability. + + Returns: + Table of function values evaluated at different angles and std dev levels. The final shape is + [num_sigma x num_omega]. + """ + dlog_igso = generate_lookup_table( + base_function=dlog_igso3_expansion, + omega_grid=omega_grid, + sigma_grid=sigma_grid, + l_max=l_max, + tol=tol, + ) + return dlog_igso + + +if __name__ == '__main__': + sigma_grid = torch.linspace(0.1, 1.5, 1000) + igso3 = SampleIGSO3(1000, sigma_grid, cache_dir='.cache') + print(igso3.sample(torch.tensor([1.5]),4)) + + uso3 = SampleUSO3(1000, sigma_grid, cache_dir='.cache') + print(uso3.sample(torch.tensor([1.5]),4)) \ No newline at end of file diff --git a/pepflow/modules/so3/utils.py b/pepflow/modules/so3/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b0956030243c05348de2d4b68fa0dfe5af19dc34 --- /dev/null +++ b/pepflow/modules/so3/utils.py @@ -0,0 +1,817 @@ +import logging +import os +from typing import Callable, Dict, Optional, Tuple + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from tqdm import tqdm +import math + +import warnings +warnings.filterwarnings('ignore') +from einops import rearrange + +from copy import copy +from scipy.spatial.transform import Rotation + +def gen_random_rotmats(num_samples): + return torch.from_numpy(np.stack([Rotation.random().as_matrix() for _ in range(num_samples)])).float() + +def gen_random_rotvecs(num_samples): + return torch.from_numpy(np.stack([Rotation.random().as_rotvec() for _ in range(num_samples)])).float() + +def check_skew_sym(h): + # check if matrix h is skew-symmetric + SKEW_SYMMETRIC_TOL = 1e-4 + skew_sym_eq = torch.allclose(h, -h.permute(0, 2, 1), atol=SKEW_SYMMETRIC_TOL, rtol=0) + if not skew_sym_eq: + print('skew symmetric error', torch.abs(h+h.permute(0, 2, 1)).max()) + return False + return True + + +def check_rot_mat(R): + # check if matrix R is a rotation matrix + # (N,3,3) + ROT_MAT_TOL = 1e-4 + rot_eq = torch.allclose(torch.inverse(R), R.permute(0, 2, 1), atol=ROT_MAT_TOL, rtol=0) + rot_det_eq = torch.allclose(torch.det(R), torch.ones_like(torch.det(R)), atol=ROT_MAT_TOL, rtol=0) + if not (rot_eq and rot_det_eq): + return False + return True + +def scale_rotmat( + rotation_matrix: torch.Tensor, scalar: torch.Tensor, tol: float = 1e-7 +) -> torch.Tensor: + """ + Scale rotation matrix. This is done by converting it to vector representation, + scaling the length of the vector and converting back to matrix representation. + + Args: + rotation_matrix: Rotation matrices. + scalar: Scalar values used for scaling. Should have one fewer dimension than the + rotation matrices for correct broadcasting. + tol: Numerical offset for stability. + + Returns: + Scaled rotation matrix. + """ + # Check whether dimensions match. + assert rotation_matrix.ndim - 1 == scalar.ndim + scaled_rmat = rotvec_to_rotmat(rotmat_to_rotvec(rotation_matrix) * scalar, tol=tol) + return scaled_rmat + + +def _broadcast_identity(target: torch.Tensor) -> torch.Tensor: + """ + Generate a 3 by 3 identity matrix and broadcast it to a batch of target matrices. + + Args: + target (torch.Tensor): Batch of target 3 by 3 matrices. + + Returns: + torch.Tensor: 3 by 3 identity matrices in the shapes of the target. + """ + id3 = torch.eye(3, device=target.device, dtype=target.dtype) + id3 = torch.broadcast_to(id3, target.shape) + return id3 + + +def skew_matrix_exponential_map_axis_angle( + angles: torch.Tensor, skew_matrices: torch.Tensor +) -> torch.Tensor: + """ + Compute the matrix exponential of a rotation in axis-angle representation with the axis in skew + matrix representation form. Maps the rotation from the lie group to the rotation matrix + representation. Uses Rodrigues' formula instead of `torch.linalg.matrix_exp` for better + computational performance: + + .. math:: + + \exp(\theta \mathbf{K}) = \mathbf{I} + \sin(\theta) \mathbf{K} + [1 - \cos(\theta)] \mathbf{K}^2 + + Args: + angles (torch.Tensor): Batch of rotation angles. + skew_matrices (torch.Tensor): Batch of rotation axes in skew matrix (lie so(3)) basis. + + Returns: + torch.Tensor: Batch of corresponding rotation matrices. + """ + # Set up identity matrix and broadcast. + id3 = _broadcast_identity(skew_matrices) + + # Broadcast angle vector to right dimensions + angles = angles[..., None, None] + + exp_skew = ( + id3 + + torch.sin(angles) * skew_matrices + + (1.0 - torch.cos(angles)) + * torch.einsum("b...ik,b...kj->b...ij", skew_matrices, skew_matrices) + ) + return exp_skew + + +def skew_matrix_exponential_map( + angles: torch.Tensor, skew_matrices: torch.Tensor, tol=1e-7 +) -> torch.Tensor: + """ + Compute the matrix exponential of a rotation vector in skew matrix representation. Maps the + rotation from the lie group to the rotation matrix representation. Uses the following form of + Rodrigues' formula instead of `torch.linalg.matrix_exp` for better computational performance + (in this case the skew matrix already contains the angle factor): + + .. math :: + + \exp(\mathbf{K}) = \mathbf{I} + \frac{\sin(\theta)}{\theta} \mathbf{K} + \frac{1-\cos(\theta)}{\theta^2} \mathbf{K}^2 + + This form has the advantage, that Taylor expansions can be used for small angles (instead of + having to compute the unit length axis by dividing the rotation vector by small angles): + + .. math :: + + \frac{\sin(\theta)}{\theta} \approx 1 - \frac{\theta^2}{6} + \frac{1-\cos(\theta)}{\theta^2} \approx \frac{1}{2} - \frac{\theta^2}{24} + + Args: + angles (torch.Tensor): Batch of rotation angles. + skew_matrices (torch.Tensor): Batch of rotation axes in skew matrix (lie so(3)) basis. + + Returns: + torch.Tensor: Batch of corresponding rotation matrices. + """ + # Set up identity matrix and broadcast. + id3 = _broadcast_identity(skew_matrices) + + # Broadcast angles and pre-compute square. + angles = angles[..., None, None] + angles_sq = angles.square() + + # Get standard terms. + sin_coeff = torch.sin(angles) / angles + cos_coeff = (1.0 - torch.cos(angles)) / angles_sq + # Use second order Taylor expansion for values close to zero. + sin_coeff_small = 1.0 - angles_sq / 6.0 + cos_coeff_small = 0.5 - angles_sq / 24.0 + + mask_zero = torch.abs(angles) < tol + sin_coeff = torch.where(mask_zero, sin_coeff_small, sin_coeff) + cos_coeff = torch.where(mask_zero, cos_coeff_small, cos_coeff) + + # Compute matrix exponential using Rodrigues' formula. + exp_skew = ( + id3 + + sin_coeff * skew_matrices + + cos_coeff * torch.einsum("b...ik,b...kj->b...ij", skew_matrices, skew_matrices) + ) + return exp_skew + + +def rotvec_to_rotmat(rotation_vectors: torch.Tensor, tol: float = 1e-7) -> torch.Tensor: + """ + Convert rotation vectors to rotation matrix representation. The length of the rotation vector + is the angle of rotation, the unit vector the rotation axis. + + Args: + rotation_vectors (torch.Tensor): Batch of rotation vectors. + tol: small offset for numerical stability. + + Returns: + torch.Tensor: Rotation in rotation matrix representation. + """ + # Compute rotation angle as vector norm. + rotation_angles = torch.norm(rotation_vectors, dim=-1) + + # Map axis to skew matrix basis. + skew_matrices = vector_to_skew_matrix(rotation_vectors) + + # Compute rotation matrices via matrix exponential. + rotation_matrices = skew_matrix_exponential_map(rotation_angles, skew_matrices, tol=tol) + + return rotation_matrices + + +def rotmat_to_rotvec(rotation_matrices: torch.Tensor) -> torch.Tensor: + """ + Convert a batch of rotation matrices to rotation vectors (logarithmic map from SO(3) to so(3)). + The standard logarithmic map can be derived from Rodrigues' formula via Taylor approximation + (in this case operating on the vector coefficients of the skew so(3) basis). + + ..math :: + + \left[\log(\mathbf{R})\right]^\lor = \frac{\theta}{2\sin(\theta)} \left[\mathbf{R} - \mathbf{R}^\top\right]^\lor + + This formula has problems at 1) angles theta close or equal to zero and 2) at angles close and + equal to pi. + + To improve numerical stability for case 1), the angle term at small or zero angles is + approximated by its truncated Taylor expansion: + + .. math :: + + \left[\log(\mathbf{R})\right]^\lor \approx \frac{1}{2} (1 + \frac{\theta^2}{6}) \left[\mathbf{R} - \mathbf{R}^\top\right]^\lor + + For angles close or equal to pi (case 2), the outer product relation can be used to obtain the + squared rotation vector: + + .. math :: \omega \otimes \omega = \frac{1}{2}(\mathbf{I} + R) + + Taking the root of the diagonal elements recovers the normalized rotation vector up to the signs + of the component. The latter can be obtained from the off-diagonal elements. + + Adapted from https://github.com/jasonkyuyim/se3_diffusion/blob/2cba9e09fdc58112126a0441493b42022c62bbea/data/so3_utils.py + which was adapted from https://github.com/geomstats/geomstats/blob/master/geomstats/geometry/special_orthogonal.py + with heavy help from https://cvg.cit.tum.de/_media/members/demmeln/nurlanov2021so3log.pdf + + Args: + rotation_matrices (torch.Tensor): Input batch of rotation matrices. + + Returns: + torch.Tensor: Batch of rotation vectors. + """ + # Get angles and sin/cos from rotation matrix. + angles, angles_sin, _ = angle_from_rotmat(rotation_matrices) + # Compute skew matrix representation and extract so(3) vector components. + vector = skew_matrix_to_vector(rotation_matrices - rotation_matrices.transpose(-2, -1)) + + # Three main cases for angle theta, which are captured + # 1) Angle is 0 or close to zero -> use Taylor series for small values / return 0 vector. + mask_zero = torch.isclose(angles, torch.zeros_like(angles)).to(angles.dtype) + # 2) Angle is close to pi -> use outer product relation. + mask_pi = torch.isclose(angles, torch.full_like(angles, np.pi), atol=1e-2).to(angles.dtype) + # 3) Angle is unproblematic -> use the standard formula. + mask_else = (1 - mask_zero) * (1 - mask_pi) + + # Compute case dependent pre-factor (1/2 for angle close to 0, angle otherwise). + numerator = mask_zero / 2.0 + angles * mask_else + # The Taylor expansion used here is actually the inverse of the Taylor expansion of the inverted + # fraction sin(x) / x which gives better accuracy over a wider range (hence the minus and + # position in denominator). + denominator = ( + (1.0 - angles**2 / 6.0) * mask_zero # Taylor expansion for small angles. + + 2.0 * angles_sin * mask_else # Standard formula. + + mask_pi # Avoid zero division at angle == pi. + ) + prefactor = numerator / denominator + vector = vector * prefactor[..., None] + + # For angles close to pi, derive vectors from their outer product (ww' = 1 + R). + id3 = _broadcast_identity(rotation_matrices) + skew_outer = (id3 + rotation_matrices) / 2.0 + # Ensure diagonal is >= 0 for square root (uses identity for masking). + skew_outer = skew_outer + (torch.relu(skew_outer) - skew_outer) * id3 + + # Get basic rotation vector as sqrt of diagonal (is unit vector). + vector_pi = torch.sqrt(torch.diagonal(skew_outer, dim1=-2, dim2=-1)) + + # Compute the signs of vector elements (up to a global phase). + # Fist select indices for outer product slices with the largest norm. + signs_line_idx = torch.argmax(torch.norm(skew_outer, dim=-1), dim=-1).long() + # Select rows of outer product and determine signs. + signs_line = torch.take_along_dim(skew_outer, dim=-2, indices=signs_line_idx[..., None, None]) + signs_line = signs_line.squeeze(-2) + signs = torch.sign(signs_line) + + # Apply signs and rotation vector. + vector_pi = vector_pi * angles[..., None] * signs + + # Fill entries for angle == pi in rotation vector (basic vector has zero entries at this point). + vector = vector + vector_pi * mask_pi[..., None] + + return vector + + +def angle_from_rotmat( + rotation_matrices: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute rotation angles (as well as their sines and cosines) encoded by rotation matrices. + Uses atan2 for better numerical stability for small angles. + + Args: + rotation_matrices (torch.Tensor): Batch of rotation matrices. + + Returns: + Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Batch of computed angles, sines of the + angles and cosines of angles. + """ + # Compute sine of angles (uses the relation that the unnormalized skew vector generated by a + # rotation matrix has the length 2*sin(theta)) + skew_matrices = rotation_matrices - rotation_matrices.transpose(-2, -1) + skew_vectors = skew_matrix_to_vector(skew_matrices) + angles_sin = torch.norm(skew_vectors, dim=-1) / 2.0 + # Compute the cosine of the angle using the relation cos theta = 1/2 * (Tr[R] - 1) + angles_cos = (torch.einsum("...ii", rotation_matrices) - 1.0) / 2.0 + + # Compute angles using the more stable atan2 + angles = torch.atan2(angles_sin, angles_cos) + + return angles, angles_sin, angles_cos + + +def vector_to_skew_matrix(vectors: torch.Tensor) -> torch.Tensor: + """ + Map a vector into the corresponding skew matrix so(3) basis. + ``` + [ 0 -z y] + [x,y,z] -> [ z 0 -x] + [ -y x 0] + ``` + + Args: + vectors (torch.Tensor): Batch of vectors to be mapped to skew matrices. + + Returns: + torch.Tensor: Vectors in skew matrix representation. + """ + # Generate empty skew matrices. + skew_matrices = torch.zeros((*vectors.shape, 3), device=vectors.device, dtype=vectors.dtype) + + # Populate positive values. + skew_matrices[..., 2, 1] = vectors[..., 0] + skew_matrices[..., 0, 2] = vectors[..., 1] + skew_matrices[..., 1, 0] = vectors[..., 2] + + # Generate skew symmetry. + skew_matrices = skew_matrices - skew_matrices.transpose(-2, -1) + + return skew_matrices + + +def skew_matrix_to_vector(skew_matrices: torch.Tensor) -> torch.Tensor: + """ + Extract a rotation vector from the so(3) skew matrix basis. + + Args: + skew_matrices (torch.Tensor): Skew matrices. + + Returns: + torch.Tensor: Rotation vectors corresponding to skew matrices. + """ + vectors = torch.zeros_like(skew_matrices[..., 0]) + vectors[..., 0] = skew_matrices[..., 2, 1] + vectors[..., 1] = skew_matrices[..., 0, 2] + vectors[..., 2] = skew_matrices[..., 1, 0] + return vectors + + +def _rotquat_to_axis_angle( + rotation_quaternions: torch.Tensor, tol: float = 1e-7 +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Auxiliary routine for computing rotation angle and rotation axis from unit quaternions. To avoid + complications, rotations vectors with angles below `tol` are set to zero. + + Args: + rotation_quaternions (torch.Tensor): Rotation quaternions in [r, i, j, k] format. + tol (float, optional): Threshold for small rotations. Defaults to 1e-7. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Rotation angles and axes. + """ + # Compute rotation axis and normalize (accounting for small length axes). + rotation_axes = rotation_quaternions[..., 1:] + rotation_axes_norms = torch.norm(rotation_axes, dim=-1) + + # Compute rotation angle via atan2 + rotation_angles = 2.0 * torch.atan2(rotation_axes_norms, rotation_quaternions[..., 0]) + + # Save division. + rotation_axes = rotation_axes / (rotation_axes_norms[:, None] + tol) + return rotation_angles, rotation_axes + + +def rotquat_to_rotvec(rotation_quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert unit quaternions to rotation vectors. + + Args: + rotation_quaternions (torch.Tensor): Input quaternions in [r,i,j,k] format. + + Returns: + torch.Tensor: Rotation vectors. + """ + rotation_angles, rotation_axes = _rotquat_to_axis_angle(rotation_quaternions) + rotation_vectors = rotation_axes * rotation_angles[..., None] + return rotation_vectors + + +def rotquat_to_rotmat(rotation_quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert unit quaternion to rotation matrix. + + Args: + rotation_quaternions (torch.Tensor): Input quaternions in [r,i,j,k] format. + + Returns: + torch.Tensor: Rotation matrices. + """ + rotation_angles, rotation_axes = _rotquat_to_axis_angle(rotation_quaternions) + skew_matrices = vector_to_skew_matrix(rotation_axes * rotation_angles[..., None]) + rotation_matrices = skew_matrix_exponential_map(rotation_angles, skew_matrices) + return rotation_matrices + + +def apply_rotvec_to_rotmat( + rotation_matrices: torch.Tensor, + rotation_vectors: torch.Tensor, + tol: float = 1e-7, +) -> torch.Tensor: + """ + Update a rotation encoded in a rotation matrix with a rotation vector. + + Args: + rotation_matrices: Input batch of rotation matrices. + rotation_vectors: Input batch of rotation vectors. + tol: Small offset for numerical stability. + + Returns: + Updated rotation matrices. + """ + # Convert vector to matrices. + rmat_right = rotvec_to_rotmat(rotation_vectors, tol=tol) + # Accumulate rotation. + rmat_rotated = torch.einsum("...ij,...jk->...ik", rotation_matrices, rmat_right) + return rmat_rotated + + +def rotmat_to_skew_matrix(mat: torch.Tensor) -> torch.Tensor: + """ + Generates skew matrix for corresponding rotation matrix. + + Args: + mat (torch.Tensor): Batch of rotation matrices. + + Returns: + torch.Tensor: Skew matrices in the shapes of mat. + """ + vec = rotmat_to_rotvec(mat) + return vector_to_skew_matrix(vec) + + +def skew_matrix_to_rotmat(skew: torch.Tensor) -> torch.Tensor: + """ + Generates rotation matrix for corresponding skew matrix. + + Args: + skew (torch.Tensor): Batch of target 3 by 3 skew symmetric matrices. + + Returns: + torch.Tensor: Rotation matrices in the shapes of skew. + """ + vec = skew_matrix_to_vector(skew) + return rotvec_to_rotmat(vec) + + +def hat(vector: torch.Tensor) -> torch.Tensor: + """convert vector to so(3)""" + return vector_to_skew_matrix(vector) + +def vee(matrix: torch.Tensor) -> torch.Tensor: + """convert so(3) to vector""" + return skew_matrix_to_vector(matrix) + +def exp(matrix: torch.Tensor) -> torch.Tensor: + """map so(3) to SO(3)""" + return skew_matrix_to_rotmat(matrix) + +def log(rotmat: torch.Tensor) -> torch.Tensor: + """map SO(3) to so(3)""" + return rotmat_to_skew_matrix(rotmat) + +def rot_transpose(rotmat: torch.Tensor) -> torch.Tensor: + """rotation matrix inverse""" + return rotmat.transpose(-2, -1) + +def rot_mult(mat_1: torch.Tensor, mat_2: torch.Tensor) -> torch.Tensor: + """Matrix multiply two rotation matrices with leading dimensions.""" + return torch.einsum("...ij,...jk->...ik", mat_1, mat_2) + +def multidim_trace(mat: torch.Tensor) -> torch.Tensor: + """Take the trace of a matrix with leading dimensions.""" + return torch.einsum("...ii->...", mat) + +def geodesic_dist(mat_1: torch.Tensor, mat_2: torch.Tensor) -> torch.Tensor: + """ + Calculate the geodesic distance of two rotation matrices. + + Args: + mat_1 (torch.Tensor): First rotation matrix. + mat_2 (torch.Tensor): Second rotation matrix. + + Returns: + Scalar for the geodesic distance between mat_1 and mat_2 with the same + leading (i.e. batch) dimensions. + """ + A = rotmat_to_skew_matrix(rot_mult(rot_transpose(mat_1), mat_2)) + return torch.sqrt(multidim_trace(rot_mult(A, rot_transpose(A)))) + +def expmap(tangent_vec: torch.Tensor, base_point: torch.Tensor) -> torch.Tensor: + """ + Map a point in the tangent space of base_point (SO(3)) to the manifold. + + Args: + tangent_vec (torch.Tensor): Point in the tangent space of base_point. + base_point (torch.Tensor): Point on the manifold in SO(3). + + Returns: + torch.Tensor: Point on the manifold in SO(3). + """ + lie_vec = rot_mult(rot_transpose(base_point), tangent_vec) + return rot_mult(base_point, exp(lie_vec)) + +def logmap(point: torch.Tensor, base_point: torch.Tensor) -> torch.Tensor: + """ + Logmap from base_point to point. + + Args: + point (torch.Tensor): Point on the manifold in SO(3). + base_point (torch.Tensor): Point on the manifold in SO(3). + + Returns: + torch.Tensor: tangent_vec in the tangent space of base_point. + """ + lie_point = rot_mult(rot_transpose(base_point), point) + return rot_mult(base_point, log(lie_point)) + +DEFAULT_ACOS_BOUND: float = 1.0 - 1e-4 + +def acos_linear_extrapolation( + x: torch.Tensor, + bounds: Tuple[float, float] = (-DEFAULT_ACOS_BOUND, DEFAULT_ACOS_BOUND), +) -> torch.Tensor: + """ + Implements `arccos(x)` which is linearly extrapolated outside `x`'s original + domain of `(-1, 1)`. This allows for stable backpropagation in case `x` + is not guaranteed to be strictly within `(-1, 1)`. + + More specifically:: + + bounds=(lower_bound, upper_bound) + if lower_bound <= x <= upper_bound: + acos_linear_extrapolation(x) = acos(x) + elif x <= lower_bound: # 1st order Taylor approximation + acos_linear_extrapolation(x) + = acos(lower_bound) + dacos/dx(lower_bound) * (x - lower_bound) + else: # x >= upper_bound + acos_linear_extrapolation(x) + = acos(upper_bound) + dacos/dx(upper_bound) * (x - upper_bound) + + Args: + x: Input `Tensor`. + bounds: A float 2-tuple defining the region for the + linear extrapolation of `acos`. + The first/second element of `bound` + describes the lower/upper bound that defines the lower/upper + extrapolation region, i.e. the region where + `x <= bound[0]`/`bound[1] <= x`. + Note that all elements of `bound` have to be within (-1, 1). + Returns: + acos_linear_extrapolation: `Tensor` containing the extrapolated `arccos(x)`. + """ + + lower_bound, upper_bound = bounds + + if lower_bound > upper_bound: + raise ValueError("lower bound has to be smaller or equal to upper bound.") + + if lower_bound <= -1.0 or upper_bound >= 1.0: + raise ValueError("Both lower bound and upper bound have to be within (-1, 1).") + + # init an empty tensor and define the domain sets + acos_extrap = torch.empty_like(x) + x_upper = x >= upper_bound + x_lower = x <= lower_bound + x_mid = (~x_upper) & (~x_lower) + + # acos calculation for upper_bound < x < lower_bound + acos_extrap[x_mid] = torch.acos(x[x_mid]) + # the linear extrapolation for x >= upper_bound + acos_extrap[x_upper] = _acos_linear_approximation(x[x_upper], upper_bound) + # the linear extrapolation for x <= lower_bound + acos_extrap[x_lower] = _acos_linear_approximation(x[x_lower], lower_bound) + + return acos_extrap + +def _acos_linear_approximation(x: torch.Tensor, x0: float) -> torch.Tensor: + """ + Calculates the 1st order Taylor expansion of `arccos(x)` around `x0`. + """ + return (x - x0) * _dacos_dx(x0) + math.acos(x0) + + +def _dacos_dx(x: float) -> float: + """ + Calculates the derivative of `arccos(x)` w.r.t. `x`. + """ + return (-1.0) / math.sqrt(1.0 - x * x) + +def so3_relative_angle( + R1: torch.Tensor, + R2: torch.Tensor, + cos_angle: bool = False, + cos_bound: float = 1e-4, + eps: float = 1e-4, +) -> torch.Tensor: + """ + Calculates the relative angle (in radians) between pairs of + rotation matrices `R1` and `R2` with `angle = acos(0.5 * (Trace(R1 R2^T)-1))` + + .. note:: + This corresponds to a geodesic distance on the 3D manifold of rotation + matrices. + + Args: + R1: Batch of rotation matrices of shape `(minibatch, 3, 3)`. + R2: Batch of rotation matrices of shape `(minibatch, 3, 3)`. + cos_angle: If==True return cosine of the relative angle rather than + the angle itself. This can avoid the unstable calculation of `acos`. + cos_bound: Clamps the cosine of the relative rotation angle to + [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients + of the `acos` call. Note that the non-finite outputs/gradients + are returned when the angle is requested (i.e. `cos_angle==False`) + and the rotation angle is close to 0 or π. + eps: Tolerance for the valid trace check of the relative rotation matrix + in `so3_rotation_angle`. + Returns: + Corresponding rotation angles of shape `(minibatch,)`. + If `cos_angle==True`, returns the cosine of the angles. + + Raises: + ValueError if `R1` or `R2` is of incorrect shape. + ValueError if `R1` or `R2` has an unexpected trace. + """ + R12 = R1.double() @ R2.permute(0, 2, 1).double() + return so3_rotation_angle(R12, cos_angle=cos_angle, cos_bound=cos_bound, eps=eps) + +def so3_rotation_angle( + R: torch.Tensor, + eps: float = 1e-4, + cos_angle: bool = False, + cos_bound: float = 1e-4, +) -> torch.Tensor: + """ + Calculates angles (in radians) of a batch of rotation matrices `R` with + `angle = acos(0.5 * (Trace(R)-1))`. The trace of the + input matrices is checked to be in the valid range `[-1-eps,3+eps]`. + The `eps` argument is a small constant that allows for small errors + caused by limited machine precision. + + Args: + R: Batch of rotation matrices of shape `(minibatch, 3, 3)`. + eps: Tolerance for the valid trace check. + cos_angle: If==True return cosine of the rotation angles rather than + the angle itself. This can avoid the unstable + calculation of `acos`. + cos_bound: Clamps the cosine of the rotation angle to + [-1 + cos_bound, 1 - cos_bound] to avoid non-finite outputs/gradients + of the `acos` call. Note that the non-finite outputs/gradients + are returned when the angle is requested (i.e. `cos_angle==False`) + and the rotation angle is close to 0 or π. + + Returns: + Corresponding rotation angles of shape `(minibatch,)`. + If `cos_angle==True`, returns the cosine of the angles. + + Raises: + ValueError if `R` is of incorrect shape. + ValueError if `R` has an unexpected trace. + """ + + N, dim1, dim2 = R.shape + if dim1 != 3 or dim2 != 3: + raise ValueError("Input has to be a batch of 3x3 Tensors.") + + rot_trace = R[:, 0, 0] + R[:, 1, 1] + R[:, 2, 2] + + if ((rot_trace < -1.0 - eps) + (rot_trace > 3.0 + eps)).any(): + raise ValueError("A matrix has trace outside valid range [-1-eps,3+eps].") + + # phi ... rotation angle + phi_cos = (rot_trace - 1.0) * 0.5 + + if cos_angle: + return phi_cos + else: + if cos_bound > 0.0: + bound = 1.0 - cos_bound + return acos_linear_extrapolation(phi_cos, (-bound, bound)) + else: + return torch.acos(phi_cos) + +def tangent_space_proj(base_point: torch.Tensor, matrix: torch.Tensor) -> torch.Tensor: + """ + Project the given 3x3 matrix matrix onto the tangent space of SO(3) at base_point in PyTorch. + + Args: + - matrix (torch.Tensor): a batch of 3x3 matrix from R^9 + - base_point (torch.Tensor): a batch of 3x3 matrix from SO(3) representing the point of tangency + + Returns: + - T (torch.Tensor): projected 3x3 matrix in the tangent space of SO(3) at R + """ + # Compute the skew-symmetric part of M + skew_symmetric_part = 0.5 * (matrix - rot_transpose(matrix)) + + # Project onto the tangent space at R + return rot_mult(base_point, skew_symmetric_part) + + +def norm_SO3(base_point: torch.Tensor, tangent_vec: torch.Tensor) -> torch.Tensor: + """calulate the norm squared of tangent_vec in the tangent space of base_point""" + r = rot_mult(rot_transpose(base_point), tangent_vec) # map backto so(3) + norm = -torch.diagonal(r@r, dim1=-2, dim2=-1).sum(dim=-1)/2 #-trace(rTr)/2 + return norm + +def norm_SO3_aa(base_point: torch.Tensor, tangent_vec: torch.Tensor) -> torch.Tensor: + """calulate the norm squared of matrix T_R in the tangent space of R using axis-angle representation""" + r = rot_mult(rot_transpose(base_point), tangent_vec) # map backto so(3) + r_aa = skew_matrix_to_vector(r) # r_aa is the axis-angle representation of r, vector representation + norm = torch.linalg.norm(r_aa, dim=-1)**2 + return norm + +def geodesic_t(t: float, mat: torch.Tensor, base_mat: torch.Tensor, rot_vf=None) -> torch.Tensor: + """ + Computes the geodesic at time t. Specifically, R_t = Exp_{base_mat}(t * Log_{base_mat}(mat)). + + Args: + t: time along geodesic. + mat: target points on manifold. + base_mat: source point on manifold. + + Returns: + Point along geodesic starting at base_mat and ending at mat. + """ + if rot_vf is None: + rot_vf = rotmat_to_rotvec(rot_mult(rot_transpose(base_mat), mat)) + # print(f"t:{t.shape},rot_vf:{rot_vf.shape}") + # raise ValueError + mat_t = rotvec_to_rotmat(t * rot_vf) + if base_mat.shape != mat_t.shape: + raise ValueError( + f'Incompatible shapes: base_mat={base_mat.shape}, mat_t={mat_t.shape}') + return torch.einsum("...ij,...jk->...ik", base_mat, mat_t) + + +def pairwise_geodesic_distance(x0, x1): + """ Compute the pairwise geodisc distance between x0 and x1 on SO3. + Parameters + ---------- + x0 : Tensor, shape (bs, 3, 3) + represents the source minibatch + x1 : Tensor, shape (bs, 3, 3) + represents the source minibatch + + Returns + ------- + distances : Tensor, shape (bs, bs) + represents the ground cost matrix between minibatches + """ + batch_size = x0.size(0) + x0 = rearrange(x0, 'b c d -> b (c d)', c=3, d=3) + x1 = rearrange(x1, 'b c d -> b (c d)', c=3, d=3) + mega_batch_x0 = rearrange(x0.repeat_interleave(batch_size, dim=0), 'b (c d) -> b c d', c=3, d=3) + mega_batch_x1 = rearrange(x1.repeat(batch_size, 1), 'b (c d) -> b c d', c=3, d=3) + distances = so3_relative_angle(mega_batch_x0, mega_batch_x1)**2 + return distances.reshape(batch_size, batch_size) + + + +def calc_rot_vf(mat_t: torch.Tensor, mat_1: torch.Tensor) -> torch.Tensor: + """ + Computes the vector field Log_{mat_t}(mat_1). + + Args: + mat_t (torch.Tensor): base point to compute vector field at. + mat_1 (torch.Tensor): target rotation. + + Returns: + Rotation vector representing the vector field. + """ + return rotmat_to_rotvec(rot_mult(rot_transpose(mat_t), mat_1)) + + +def rotation_matrix_cosine_loss(R_pred, R_true): + """ + Args: + R_pred: (*, 3, 3). + R_true: (*, 3, 3). + Returns: + Per-matrix losses, (*, ). + """ + size = list(R_pred.shape[:-2]) + ncol = R_pred.numel() // 3 + + RT_pred = R_pred.transpose(-2, -1).reshape(ncol, 3) # (ncol, 3) + RT_true = R_true.transpose(-2, -1).reshape(ncol, 3) # (ncol, 3) + + ones = torch.ones([ncol, ], dtype=torch.long, device=R_pred.device) + loss = F.cosine_embedding_loss(RT_pred, RT_true, ones, reduction='none') # (ncol*3, ) + loss = loss.reshape(size + [3]).sum(dim=-1) # (*, ) + return loss + +if __name__ == "__main__": + #TODO: test rotation/vf loss + pass diff --git a/pepflow/utils/data.py b/pepflow/utils/data.py new file mode 100644 index 0000000000000000000000000000000000000000..f88d9743367d2cba82bc0725b64fdfe6c18a365b --- /dev/null +++ b/pepflow/utils/data.py @@ -0,0 +1,156 @@ +import math +import torch +from torch.utils.data._utils.collate import default_collate +from pepflow.modules.protein.constants import PAD_RESIDUE_INDEX + +import os + + +DEFAULT_PAD_VALUES = { + 'aa': PAD_RESIDUE_INDEX, #0-20,+21 + 'chain_id': ' ', + 'icode': ' ', +} + +DEFAULT_NO_PADDING = { + # 'origin', +} + +class PaddingCollate(object): + + def __init__(self, length_ref_key='aa', pad_values=DEFAULT_PAD_VALUES, no_padding=DEFAULT_NO_PADDING, eight=True): + super().__init__() + self.length_ref_key = length_ref_key + self.pad_values = pad_values + self.no_padding = no_padding + self.eight = eight + + @staticmethod + def _pad_last(x, n, value=0): + if isinstance(x, torch.Tensor): + assert x.size(0) <= n + if x.size(0) == n: + return x + pad_size = [n - x.size(0)] + list(x.shape[1:]) + pad = torch.full(pad_size, fill_value=value).to(x) + return torch.cat([x, pad], dim=0) + elif isinstance(x, list): + pad = [value] * (n - len(x)) + return x + pad + else: + return x + + @staticmethod + def _get_pad_mask(l, n): + return torch.cat([ + torch.ones([l], dtype=torch.bool), + torch.zeros([n-l], dtype=torch.bool) + ], dim=0) + + @staticmethod + def _get_common_keys(list_of_dict): + keys = set(list_of_dict[0].keys()) + for d in list_of_dict[1:]: + keys = keys.intersection(d.keys()) + return keys + + + def _get_pad_value(self, key): + if key not in self.pad_values: + return 0 + return self.pad_values[key] + + def __call__(self, data_list): + max_length = max([data[self.length_ref_key].size(0) for data in data_list]) + keys = self._get_common_keys(data_list) + + if self.eight: + max_length = math.ceil(max_length / 8) * 8 + data_list_padded = [] + for data in data_list: + data_padded = { + k: self._pad_last(v, max_length, value=self._get_pad_value(k)) if k not in self.no_padding else v + for k, v in data.items() + if k in keys + } + data_padded['res_mask'] = (self._get_pad_mask(data[self.length_ref_key].size(0), max_length)) + data_list_padded.append(data_padded) + return default_collate(data_list_padded) + + +def apply_patch_to_tensor(x_full, x_patch, patch_idx): + """ + Args: + x_full: (N, ...) + x_patch: (M, ...) + patch_idx: (M, ) + Returns: + (N, ...) + """ + x_full = x_full.clone() + x_full[patch_idx] = x_patch + return x_full + + +def index_select(v, index, n): + if isinstance(v, torch.Tensor) and v.size(0) == n: + return v[index] + elif isinstance(v, list) and len(v) == n: + return [v[i] for i in index] + else: + return v + + +def index_select_data(data, index): + return { + k: index_select(v, index, data['aa'].size(0)) + for k, v in data.items() + } + + +def mask_select(v, mask): + if isinstance(v, torch.Tensor) and v.size(0) == mask.size(0): + return v[mask] + elif isinstance(v, list) and len(v) == mask.size(0): + return [v[i] for i, b in enumerate(mask) if b] + else: + return v + + +def mask_select_data(data, mask): + return { + k: mask_select(v, mask) + for k, v in data.items() + } + + +def find_longest_true_segment(input_tensor): + max_segment_length = 0 + max_segment_start = 0 + current_segment_length = 0 + current_segment_start = 0 + input_list = input_tensor.tolist() # 转换为Python列表以便遍历 + + for i, value in enumerate(input_list): + if value: # 如果当前位置为True + current_segment_length += 1 + if current_segment_length > max_segment_length: + max_segment_length = current_segment_length + max_segment_start = current_segment_start + else: + current_segment_length = 0 + current_segment_start = i + 1 + + # 创建一个新的PyTorch Tensor,将最长的True段位置置为True,其他位置置为False + result_tensor = torch.zeros_like(input_tensor, dtype=torch.bool) + result_tensor[max_segment_start:max_segment_start + max_segment_length] = True + + return result_tensor + +def get_test_batch(dataset_dir='/datapool/data2/home/jiahan/Res Proj/PepDiff/PepFlow/Data',name='batch.pt'): + return torch.load(os.path.join(dataset_dir,name)) + +if __name__ == '__main__': + batch = get_test_batch() + print(batch) + diff --git a/pepflow/utils/misc.py b/pepflow/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..e171d685fec186fb26562d60194637cee0115f7a --- /dev/null +++ b/pepflow/utils/misc.py @@ -0,0 +1,128 @@ +import os +import time +import random +import logging +from typing import OrderedDict +import torch +import torch.linalg +import numpy as np +import yaml +from easydict import EasyDict +from glob import glob + + +class BlackHole(object): + def __setattr__(self, name, value): + pass + + def __call__(self, *args, **kwargs): + return self + + def __getattr__(self, name): + return self + + +class Counter(object): + def __init__(self, start=0): + super().__init__() + self.now = start + + def step(self, delta=1): + prev = self.now + self.now += delta + return prev + + +def get_logger(name, log_dir=None, local_rank=0): + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + # formatter = logging.Formatter('[%(asctime)s::%(name)s::%(levelname)s] %(message)s') + formatter = logging.Formatter('[%(asctime)s::%(name)s::%(levelname)s] %(message)s') + + stream_handler = logging.StreamHandler() + stream_handler.setLevel(logging.DEBUG) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + + if log_dir is not None: + # file_handler = logging.FileHandler(os.path.join(log_dir, 'log.txt')) + file_handler = logging.FileHandler(os.path.join(log_dir, 'log_%d.txt' % local_rank)) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + + +def get_new_log_dir(root='./logs', prefix='', tag=''): + fn = time.strftime('%Y_%m_%d__%H_%M_%S', time.localtime()) + if prefix != '': + fn = prefix + '_' + fn + if tag != '': + fn = fn + '_' + tag + log_dir = os.path.join(root, fn) + os.makedirs(log_dir) + return log_dir + + +def seed_all(seed): + torch.backends.cudnn.deterministic = True + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + random.seed(seed) + + +def inf_iterator(iterable): + iterator = iterable.__iter__() + while True: + try: + yield iterator.__next__() + except StopIteration: + iterator = iterable.__iter__() + + +def log_hyperparams(writer, args): + from torch.utils.tensorboard.summary import hparams + vars_args = {k: v if isinstance(v, str) else repr(v) for k, v in vars(args).items()} + exp, ssi, sei = hparams(vars_args, {}) + writer.file_writer.add_summary(exp) + writer.file_writer.add_summary(ssi) + writer.file_writer.add_summary(sei) + + +def int_tuple(argstr): + return tuple(map(int, argstr.split(','))) + + +def str_tuple(argstr): + return tuple(argstr.split(',')) + + +def get_checkpoint_path(folder, it=None): + if it is not None: + return os.path.join(folder, '%d.pt' % it), it + all_iters = list(map(lambda x: int(os.path.basename(x[:-3])), glob(os.path.join(folder, '*.pt')))) + all_iters.sort() + return os.path.join(folder, '%d.pt' % all_iters[-1]), all_iters[-1] + + +def load_config(config_path): + with open(config_path, 'r') as f: + config = EasyDict(yaml.safe_load(f)) + config_name = os.path.basename(config_path)[:os.path.basename(config_path).rfind('.')] + return config, config_name + + +def extract_weights(weights: OrderedDict, prefix): + extracted = OrderedDict() + for k, v in weights.items(): + if k.startswith(prefix): + extracted.update({ + k[len(prefix):]: v + }) + return extracted + + +def current_milli_time(): + return round(time.time() * 1000) diff --git a/pepflow/utils/train.py b/pepflow/utils/train.py new file mode 100644 index 0000000000000000000000000000000000000000..06e5f6571e7080d86331c566fd1dead07415482c --- /dev/null +++ b/pepflow/utils/train.py @@ -0,0 +1,159 @@ +import wandb + +import numpy as np +import torch +from easydict import EasyDict + +from pepflow.utils.misc import BlackHole + + + +def get_optimizer(cfg, model): + if cfg.type == 'adam': + return torch.optim.Adam( + model.parameters(), + lr=cfg.lr, + weight_decay=cfg.weight_decay, + betas=(cfg.beta1, cfg.beta2, ) + ) + elif cfg.type == 'adamw': + return torch.optim.AdamW( + model.parameters(), + lr=cfg.lr, + weight_decay=cfg.weight_decay, + ) + else: + raise NotImplementedError('Optimizer not supported: %s' % cfg.type) + + +def get_scheduler(cfg, optimizer): + if cfg.type is None: + return BlackHole() + elif cfg.type == 'plateau': + return torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer, + factor=cfg.factor, + patience=cfg.patience, + min_lr=cfg.min_lr, + ) + elif cfg.type == 'multistep': + return torch.optim.lr_scheduler.MultiStepLR( + optimizer, + milestones=cfg.milestones, + gamma=cfg.gamma, + ) + elif cfg.type == 'exp': + return torch.optim.lr_scheduler.ExponentialLR( + optimizer, + gamma=cfg.gamma, + ) + elif cfg.type is None: + return BlackHole() + else: + raise NotImplementedError('Scheduler not supported: %s' % cfg.type) + + +def get_warmup_sched(cfg, optimizer): + if cfg is None: return BlackHole() + lambdas = [lambda it : (it / cfg.max_iters) if it <= cfg.max_iters else 1 for _ in optimizer.param_groups] + warmup_sched = torch.optim.lr_scheduler.LambdaLR(optimizer, lambdas) + return warmup_sched + + +def log_losses(loss, loss_dict, scalar_dict, it, tag, logger=BlackHole(), writer=BlackHole()): + logstr = '[%s] Iter %05d' % (tag, it) + logstr += ' | loss %.4f' % loss.item() + for k, v in loss_dict.items(): + logstr += ' | loss(%s) %.4f' % (k, v.item()) + for k, v in scalar_dict.items(): + logstr += ' | %s %.4f' % (k, v.item() if isinstance(v, torch.Tensor) else v) + logger.info(logstr) + + for k,v in loss_dict.items(): + wandb.log({f'train/loss_{k}': v}, step=it) + for k,v in scalar_dict.items(): + wandb.log({f'train/{k}': v}, step=it) + + # writer.add_scalar('%s/loss' % tag, loss, it) + # for k, v in loss_dict.items(): + # writer.add_scalar('%s/loss_%s' % (tag, k), v, it) + # for k, v in scalar_dict.items(): + # writer.add_scalar('%s/%s' % (tag, k), v, it) + # writer.flush() + + +class ScalarMetricAccumulator(object): + + def __init__(self): + super().__init__() + self.accum_dict = {} + self.count_dict = {} + + @torch.no_grad() + def add(self, name, value, batchsize=None, mode=None): + assert mode is None or mode in ('mean', 'sum') + + if mode is None: + delta = value.sum() + count = value.size(0) + elif mode == 'mean': + delta = value * batchsize + count = batchsize + elif mode == 'sum': + delta = value + count = batchsize + delta = delta.item() if isinstance(delta, torch.Tensor) else delta + + if name not in self.accum_dict: + self.accum_dict[name] = 0 + self.count_dict[name] = 0 + self.accum_dict[name] += delta + self.count_dict[name] += count + + def log(self, it, tag, logger=BlackHole(), writer=BlackHole()): + summary = {k: self.accum_dict[k] / self.count_dict[k] for k in self.accum_dict} + logstr = '[%s] Iter %05d' % (tag, it) + for k, v in summary.items(): + logstr += ' | %s %.4f' % (k, v) + writer.add_scalar('%s/%s' % (tag, k), v, it) + wandb.log({f'{tag}/{k}': v}, step=it) + logger.info(logstr) + + def get_average(self, name): + return self.accum_dict[name] / self.count_dict[name] + + +def recursive_to(obj, device): + if isinstance(obj, torch.Tensor): + try: + return obj.cuda(device=device, non_blocking=True) + except RuntimeError: + return obj.to(device) + elif isinstance(obj, list): + return [recursive_to(o, device=device) for o in obj] + elif isinstance(obj, tuple): + return tuple(recursive_to(o, device=device) for o in obj) + elif isinstance(obj, dict): + return {k: recursive_to(v, device=device) for k, v in obj.items()} + + else: + return obj + + +def sum_weighted_losses(losses, weights): + """ + Args: + losses: Dict of scalar tensors. + weights: Dict of weights. + """ + loss = 0 + for k in losses.keys(): + if weights is None: + loss = loss + losses[k] + else: + loss = loss + weights[k] * losses[k] + return loss + + +def count_parameters(model): + return sum(p.numel() for p in model.parameters()) diff --git a/pepflow/utils/vc.py b/pepflow/utils/vc.py new file mode 100644 index 0000000000000000000000000000000000000000..0da9f088c59209a9a579906e63a852a658b43698 --- /dev/null +++ b/pepflow/utils/vc.py @@ -0,0 +1,20 @@ +from git import Repo + + +def has_changes(path='./'): + repo = Repo(path) + changed_files = [f.a_path for f in repo.index.diff(None)] + repo.untracked_files + changed_files = list(filter(lambda p: not p.startswith('configs/'), changed_files)) + if len(changed_files) > 0: + print('\n\nYou have uncommitted changes:') + for fn in changed_files: + print(' - %s' % fn) + print('Please commit your changes before running the script.\n\n') + return True + else: + return False + + +def get_version(path='./'): + repo = Repo(path) + return repo.active_branch.name, repo.head.object.hexsha diff --git a/playgrounds/cluster.ipynb b/playgrounds/cluster.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f6d372fdb27ca3a1c0f384d1d1e799988f469f85 --- /dev/null +++ b/playgrounds/cluster.ipynb @@ -0,0 +1,1135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/datapool/data2/home/jiahan/anaconda3/envs/SE3nv/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import logging\n", + "import joblib\n", + "import pickle\n", + "import lmdb\n", + "from Bio import PDB\n", + "from Bio.PDB import PDBExceptions\n", + "from torch.utils.data import Dataset\n", + "from tqdm.auto import tqdm\n", + "\n", + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from Bio.PDB import PDBParser\n", + "from Bio.SeqUtils import seq1\n", + "\n", + "def get_fasta_from_pdb(pdb_file):\n", + " parser = PDBParser()\n", + " structure = parser.get_structure(\"pdb\", pdb_file)\n", + " \n", + " fasta_sequence = \"\"\n", + " for chain in structure.get_chains():\n", + " for residue in chain.get_residues():\n", + " fasta_sequence += seq1(residue.get_resname())\n", + " \n", + " return fasta_sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'D'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seq1('ASP')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "structure_dir = \"/datapool/data2/home/jiahan/Data/PepMerge_new/\"\n", + "seqs_dir = \"/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/seqs\"\n", + "bind_dic = torch.load(\"/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/misc/affinity_dict.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10348\n", + "9464\n" + ] + } + ], + "source": [ + "all_pdbs = os.listdir(structure_dir)\n", + "print(len(all_pdbs))\n", + "all_pdbs = [x for x in all_pdbs if x in bind_dic]\n", + "print(len(all_pdbs))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'YDQTLFSIVEWARSSFRELKVDDQMKLLQNCWSLLPYNNLLIEMLHAK'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_fasta_from_pdb(os.path.join(structure_dir,all_pdbs[0],'pocket.pdb'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(seqs_dir,'seqs.fasta'),'w') as f:\n", + " for pdb in tqdm(all_pdbs):\n", + " fasta = get_fasta_from_pdb(os.path.join(structure_dir,pdb,'receptor.pdb'))\n", + " f.write(f'>{pdb}\\n')\n", + " f.write(fasta+'\\n')\n", + "# mmseqs easy-cluster seqs.fasta clusterRes tmp --min-seq-id 0.4 -c 0.8 --cov-mode 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centeridcnts
7206u3n_C3kla_C657
5616u3n_C5ib3_C657
5636u3n_C5nmk_C657
5646u3n_C3mrm_P657
5656u3n_C6g9r_P657
............
86171g0y_I1g0y_I1
42093c01_A3c01_A1
41984tjx_B4tjx_B1
41846ybb_1_III_C6ybb_1_III_C1
94634b45_B4b45_B1
\n", + "

9464 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " center id cnts\n", + "720 6u3n_C 3kla_C 657\n", + "561 6u3n_C 5ib3_C 657\n", + "563 6u3n_C 5nmk_C 657\n", + "564 6u3n_C 3mrm_P 657\n", + "565 6u3n_C 6g9r_P 657\n", + "... ... ... ...\n", + "8617 1g0y_I 1g0y_I 1\n", + "4209 3c01_A 3c01_A 1\n", + "4198 4tjx_B 4tjx_B 1\n", + "4184 6ybb_1_III_C 6ybb_1_III_C 1\n", + "9463 4b45_B 4b45_B 1\n", + "\n", + "[9464 rows x 3 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab = pd.read_csv('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/seqs/clusterRes_cluster.tsv',sep='\\t',header=None)\n", + "tab.columns = ['center','id']\n", + "tab['cnts'] = tab.groupby('center')['id'].transform('count')\n", + "tab.sort_values('cnts',ascending=False,inplace=True)\n", + "tab" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "cnts = tab.drop_duplicates('center')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2019" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnts[cnts['cnts']<5]['cnts'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2019" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnts[cnts['cnts']<5]['cnts'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8365" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "10384-2019" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "tab.to_csv('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/seqs/center.csv',index=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1557" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(set(tab['center']))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1557" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(set(tab['center']))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "cnts = pd.DataFrame(tab['center'].value_counts())\n", + "cnts = cnts.drop_duplicates(subset='center')\n", + "cnts.to_csv('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/seqs/center.csv',index=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlentranaarrottrans_lossrot_loss
01a0n_A148.9031440.2142862.3828320.0978631.207208
11a1m_C90.9207230.5555560.3704090.4192780.570159
21a1n_C80.6783000.8750000.4527850.2244580.172276
31a1o_C90.7135620.7777780.5731040.2118610.160051
41a1r_C161.5128500.8750001.0682790.4229040.476659
........................
94598s9i_1_III_B224.1038140.2727272.3415892.4048891.574362
94608siu_1_III_B1822.6935310.1111112.6877940.3711030.328657
94618sm5_5_III_J173.1526460.1176471.8614840.9645290.384649
94628t0p_1_III_C1812.8880190.0555562.7241810.5460450.609222
94639lpr_P30.2616061.0000000.2388680.0117890.270934
\n", + "

9464 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " id len tran aar rot trans_loss rot_loss\n", + "0 1a0n_A 14 8.903144 0.214286 2.382832 0.097863 1.207208\n", + "1 1a1m_C 9 0.920723 0.555556 0.370409 0.419278 0.570159\n", + "2 1a1n_C 8 0.678300 0.875000 0.452785 0.224458 0.172276\n", + "3 1a1o_C 9 0.713562 0.777778 0.573104 0.211861 0.160051\n", + "4 1a1r_C 16 1.512850 0.875000 1.068279 0.422904 0.476659\n", + "... ... ... ... ... ... ... ...\n", + "9459 8s9i_1_III_B 22 4.103814 0.272727 2.341589 2.404889 1.574362\n", + "9460 8siu_1_III_B 18 22.693531 0.111111 2.687794 0.371103 0.328657\n", + "9461 8sm5_5_III_J 17 3.152646 0.117647 1.861484 0.964529 0.384649\n", + "9462 8t0p_1_III_C 18 12.888019 0.055556 2.724181 0.546045 0.609222\n", + "9463 9lpr_P 3 0.261606 1.000000 0.238868 0.011789 0.270934\n", + "\n", + "[9464 rows x 7 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "samples = pd.read_csv(\"/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/misc/231220/sample_all.csv\")\n", + "samples" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "res = pd.merge(tab,samples,on='id')\n", + "# res[['center','id','cnts','len']].to_csv('/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/Data/seqs/meta_data.csv',index=False)\n", + "res.to_csv('/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/Data/seqs/meta_data.csv',index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centeridcntslentranaarrottrans_lossrot_loss
06u3n_C3kla_C65790.9176540.6666670.6547390.0602500.475746
16u3n_C5ib3_C65790.6682580.4444440.3656150.1298410.102632
26u3n_C5nmk_C65790.7971590.8888890.2782660.1099270.236671
36u3n_C3mrm_P657100.7094780.6000000.1993720.1013380.088611
46u3n_C6g9r_P65790.8167820.8888890.2913100.2371740.124762
..............................
94591g0y_I1g0y_I12113.3383460.0476192.6008260.3869760.615135
94603c01_A3c01_A1209.3139180.3000002.0573580.1510060.600791
94614tjx_B4tjx_B130.4253271.0000000.5075350.0184091.391891
94626ybb_1_III_C6ybb_1_III_C1227.9483930.2727272.6007590.0836130.395743
94634b45_B4b45_B1132.9187690.3846151.6386140.5222430.599023
\n", + "

9464 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " center id cnts len tran aar rot \\\n", + "0 6u3n_C 3kla_C 657 9 0.917654 0.666667 0.654739 \n", + "1 6u3n_C 5ib3_C 657 9 0.668258 0.444444 0.365615 \n", + "2 6u3n_C 5nmk_C 657 9 0.797159 0.888889 0.278266 \n", + "3 6u3n_C 3mrm_P 657 10 0.709478 0.600000 0.199372 \n", + "4 6u3n_C 6g9r_P 657 9 0.816782 0.888889 0.291310 \n", + "... ... ... ... ... ... ... ... \n", + "9459 1g0y_I 1g0y_I 1 21 13.338346 0.047619 2.600826 \n", + "9460 3c01_A 3c01_A 1 20 9.313918 0.300000 2.057358 \n", + "9461 4tjx_B 4tjx_B 1 3 0.425327 1.000000 0.507535 \n", + "9462 6ybb_1_III_C 6ybb_1_III_C 1 22 7.948393 0.272727 2.600759 \n", + "9463 4b45_B 4b45_B 1 13 2.918769 0.384615 1.638614 \n", + "\n", + " trans_loss rot_loss \n", + "0 0.060250 0.475746 \n", + "1 0.129841 0.102632 \n", + "2 0.109927 0.236671 \n", + "3 0.101338 0.088611 \n", + "4 0.237174 0.124762 \n", + "... ... ... \n", + "9459 0.386976 0.615135 \n", + "9460 0.151006 0.600791 \n", + "9461 0.018409 1.391891 \n", + "9462 0.083613 0.395743 \n", + "9463 0.522243 0.599023 \n", + "\n", + "[9464 rows x 9 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = pd.read_csv('/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/Data/seqs/meta_data.csv')\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "158" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "centers = set((res[(res['cnts']>=10)&(res['cnts']<=100)])['center'])\n", + "len(centers)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1887023/2882254611.py:1: DeprecationWarning: Sampling from a set deprecated\n", + "since Python 3.9 and will be removed in a subsequent version.\n", + " tests = random.sample(centers, 10)\n" + ] + } + ], + "source": [ + "tests = random.sample(centers, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centeridcntslentranaarrottrans_lossrot_loss
39418dgo_1_III_C1r1s_B3660.6798430.8333330.4308530.3374730.250865
39438dgo_1_III_C8dgo_1_III_C3640.4253291.0000000.4827430.0908490.363330
39448dgo_1_III_C1jyr_L3690.9468990.8888890.4030770.1629880.471346
39458dgo_1_III_C3mxy_L3670.8977641.0000001.0402770.0932150.483877
39468dgo_1_III_C2h5k_C3630.3860131.0000000.4314080.2702940.130450
..............................
61942r02_B3c3o_B10133.7890580.2307692.4985160.3847860.289986
62012r02_B2r05_B10111.0306700.7272730.3032180.3523070.148563
62112r02_B5v3r_B10133.1387040.3846152.1097810.1978290.174575
62122r02_B2xs1_B10153.7137730.0666672.2640700.3781790.197648
62132r02_B2xs8_B1090.6757430.5555560.3382360.1673380.294312
\n", + "

187 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " center id cnts len tran aar rot \\\n", + "3941 8dgo_1_III_C 1r1s_B 36 6 0.679843 0.833333 0.430853 \n", + "3943 8dgo_1_III_C 8dgo_1_III_C 36 4 0.425329 1.000000 0.482743 \n", + "3944 8dgo_1_III_C 1jyr_L 36 9 0.946899 0.888889 0.403077 \n", + "3945 8dgo_1_III_C 3mxy_L 36 7 0.897764 1.000000 1.040277 \n", + "3946 8dgo_1_III_C 2h5k_C 36 3 0.386013 1.000000 0.431408 \n", + "... ... ... ... ... ... ... ... \n", + "6194 2r02_B 3c3o_B 10 13 3.789058 0.230769 2.498516 \n", + "6201 2r02_B 2r05_B 10 11 1.030670 0.727273 0.303218 \n", + "6211 2r02_B 5v3r_B 10 13 3.138704 0.384615 2.109781 \n", + "6212 2r02_B 2xs1_B 10 15 3.713773 0.066667 2.264070 \n", + "6213 2r02_B 2xs8_B 10 9 0.675743 0.555556 0.338236 \n", + "\n", + " trans_loss rot_loss \n", + "3941 0.337473 0.250865 \n", + "3943 0.090849 0.363330 \n", + "3944 0.162988 0.471346 \n", + "3945 0.093215 0.483877 \n", + "3946 0.270294 0.130450 \n", + "... ... ... \n", + "6194 0.384786 0.289986 \n", + "6201 0.352307 0.148563 \n", + "6211 0.197829 0.174575 \n", + "6212 0.378179 0.197648 \n", + "6213 0.167338 0.294312 \n", + "\n", + "[187 rows x 9 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmp = res[res['center'].isin(tests)]\n", + "tmp" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.3520436403266887" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmp['tran'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/Data/RF_samples/names.txt\",'w') as f:\n", + " for i,row in tmp.iterrows():\n", + " f.write(row['id']+'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "foldy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/playgrounds/gen_dataset.ipynb b/playgrounds/gen_dataset.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..34881c3f31383f2a0a7e0b6e8d8060266118cdc3 --- /dev/null +++ b/playgrounds/gen_dataset.ipynb @@ -0,0 +1,39289 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "from Bio import SeqIO, PDB\n", + "from Bio.PDB import PDBParser, PDBIO, Select, NeighborSearch, Structure, Model\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "import pickle\n", + "from tqdm import tqdm\n", + "import seaborn as sns\n", + "import numpy as np\n", + "import joblib\n", + "import shutil" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"/datapool/data2/home/jiahan/Res Proj/PepDiff/PepFlow/\")\n", + "\n", + "from pepflow.modules.protein.parsers import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 0. fix ATOMs in biolip" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "lig_path = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig/\"\n", + "os.makedirs(\"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig_fix/\",exist_ok=True)\n", + "lig_path_fix = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig_fix/\"\n", + "\n", + "for lig in os.listdir(lig_path):\n", + " with open(os.path.join(lig_path,lig),'r') as f:\n", + " content = f.read()\n", + " content = content.replace('HETATM','ATOM ')\n", + " with open(os.path.join(lig_path_fix,lig),'w') as f:\n", + " f.write(content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# 1.pepbdb filtering" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "pepbdb_path = \"/datapool/data2/home/jiahan/Data/pepbdb/\"\n", + "data_path = \"/datapool/data2/home/jiahan/Data/pepbdb_split/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10.641843971631205, 9024)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dic = {'pdb':[],'pep':[],'pep_len':[],'res':[],'no':[],'pep_atom':[],'rec_atom':[],'type':[]}\n", + "\n", + "with open(\"/datapool/data2/home/jiahan/Data/pepbdb/peptidelist.txt\",'r') as f:\n", + " for line in f:\n", + " line = line.split()\n", + " if 'prot' not in line[-1]:\n", + " line += ['prot']\n", + " dic['pdb'].append(line[0].strip())\n", + " dic['pep'].append(line[1].strip())\n", + " dic['pep_len'].append(int(line[2].strip()))\n", + " dic['res'].append(float(line[-2].strip()))\n", + " dic['no'].append(int(line[-4].strip())+int(line[-3].strip()))\n", + " dic['pep_atom'].append(int(line[-8].strip()))\n", + " dic['rec_atom'].append(int(line[-6].strip()))\n", + " dic['type'].append(line[-1].strip())\n", + "\n", + "dic = pd.DataFrame(dic)\n", + "\n", + "dic['rec_len'] = dic['rec_atom'] / 8\n", + "\n", + "# dic = dic[(dic['res']<=4.0) & (dic['type']=='prot') & (dic['pep_len']<=25) & (dic['pep_len']>=4)]\n", + "# dic = dic[dic['rec_len'] >= 2*dic['pep_len']]\n", + "\n", + "dic = dic[(dic['res']<=4.0) & (dic['type']=='prot') & (dic['pep_len']<=25) & (dic['pep_len']>=3)]\n", + "dic = dic[dic['rec_len'] >= 2*dic['pep_len']]\n", + "\n", + "dic['pep_len'].mean(),len(dic)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABIgAAAL3CAYAAADhiH7tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADV80lEQVR4nOzdeXhU5d3/8c+ZyWRfyQYhIewB2UEQF4oosrpV69by1LrbUq1ba9X2sW5V61P9qdXWqqAt2tpa64IrKiDKIoisIWEnIftCMtm3Ob8/YkZiAiSZSU4m835d11zXzJ1z5v7GVpN85r6/t2GapikAAAAAAAD4LZvVBQAAAAAAAMBaBEQAAAAAAAB+joAIAAAAAADAzxEQAQAAAAAA+DkCIgAAAAAAAD9HQAQAAAAAAODnCIgAAAAAAAD8XIDVBaD7uVwu5ebmKiIiQoZhWF0OAAAAAADoAaZpqqKiQklJSbLZjr9GiIDID+Tm5iolJcXqMgAAAAAAgAWys7OVnJx83GsIiPxARESEpOb/Q0RGRlpcDQAAAAAA6AlOp1MpKSnuXOB4CIj8QMu2ssjISAIiAAAAAAD8TEfazdCkGgAAAAAAwM8REAEAAAAAAPg5AiIAAAAAAAA/R0AEAAAAAADg5wiIAAAAAAAA/BwBEQAAAAAAgJ8jIAIAAAAAAPBzBEQAAAAAAAB+joAIAAAAAADAzwVYXQAAAAAAAIA/ME1TDQ0NcrlcHb7HZrPJ4XDIMIxurIyACAAAAAAAoFs1NTWpuLhYFRUVamho6PT9DodDERERiouLk91u74YKCYgAAAAAAAC6TVNTk7Kzs1VXV6eoqCiFh4fLbrd3aEWQaZpqampSZWWlysrKVFNTo5SUlG4JiQiIAAAAAAAAuklxcbHq6uo0aNAghYSEdOk9wsPDFRUVpaysLBUXFysxMdHLVdKkGgAAAAAAoFuYpqmKigpFRUV1ORxqERISosjISFVUVMg0TS9V+C0CIgAAAAAAgG7Q0NCghoYGhYeHe+X9IiIi3O/pbQREAAAAAAAA3aDltDJv9QxqeZ/OnILWUQREAAAAAAAA3chbR9R351H3BEQAAAAAAAB+joAIAAAAAADAzxEQAQAAAAAA+DkCIgAAAAAAAD9HQAQAAAAAAODnCIgAAAAAAAD8HAERAAAAAACAnyMgAgAAAAAA8HMERAAAAAAAAN3INM1e9T7tISACAAAAAADoBjZbc+zS1NTklfdreZ+W9/UmAiIAAAAAAIBu4HA45HA4VFlZ6ZX3q6iocL+ntwV4/R0BAPBAVlaWiouLrS6jQ+Li4jRo0CCrywAAAEAvZRiGIiIiVFZWpqioKIWEhHT5vWpqauR0OhUdHS3DMLxYZTMCIgBAr5GVlaVRo0erprra6lI6JCQ0VBm7dhESAQAA4Jji4uJUU1OjrKwsRUZGKiIiQna7vUMhj2maampqUkVFhZxOp4KCghQXF9ctdRIQAQB6jeLiYtVUV+tHdz6mxEHDrC7nuAqy9umVR3+p4uJiAiIAAAAck91uV0pKioqLi1VRUaGysrJOv4fD4VB0dLTi4uJkt9u9X6QIiAAAvVDioGFKHjHG6jIAAAAAr7Db7UpMTFRCQoIaGhrkcrk6fK/NZpPD4eiWbWVHIyACAAAAAADoAYZhKDAw0Ooy2sUpZgAAAAAAAH6OgAgAAAAAAMDPERABAAAAAAD4OQIiAAAAAAAAP0dABAAAAAAA4OcIiAAAAAAAAPwcAREAAAAAAICfIyACAAAAAADwcwREAAAAAAAAfo6ACAAAAAAAwM8REAEAAAAAAPg5AiIAAAAAAAA/R0AEAAAAAADg5wiIAAAAAAAA/BwBEQAAAAAAgJ8jIAIAAAAAAPBzBEQAAAAAAAB+joAIAAAAAADAzxEQAQAAAAAA+DkCIgAAAAAAAD9HQAQAAAAAAODnCIgAAAAAAAD8HAERAAAAAACAnyMgAgAAAAAA8HMERAAAAAAAAH6OgAgAAAAAAMDPERABAAAAAAD4OQIiAAAAAAAAP0dABAAAAAAA4OcIiAAAAAAAAPwcAREAAAAAAICfIyACAAAAAADwcwREAAAAAAAAfo6ACAAAAAAAwM8FWF2AN1RXV+ull17Sm2++qV27dqmoqEjh4eFKTk7W3LlzddVVV2nUqFFemevIkSP68ssvtWHDBvejpKSk1TUzZ87UqlWrTvhehYWF+vjjj7V+/Xpt375d+/fvV0lJiWpraxUREaGUlBRNmzZNP/rRjzRr1iyv1A8AAAAAAPBdPh8QrVq1SldeeaWysrJajdfV1amkpERbt27VE088oTvvvFP333+/DMPwaL7Jkyfr4MGDHr2HJC1ZskTXXnutTNNs9+tlZWUqKyvT9u3b9eKLL+qcc87R3//+dyUmJno8NwAAAAAAwNF8eovZypUrtWDBgjbh0Hc1NDTowQcf1E033eTxnMcKdDrL6XR26r1WrFihs88+W1VVVV6ZHwAAAAAAoIXPBkROp1OLFi1STU1Nq/G0tDRdf/31mjdvnmy21t/eM888o7fffttrNQQEBGj06NFeea8hQ4bo8ssv13XXXaeZM2e2u9Jp586deuSRR7wyHwAAAAAAQAuf3WL2xz/+Ubm5ua3GZs+erffee08Oh0OStHTpUl199dWtrrntttt03nnndXmr2XnnnafU1FRNnz5dU6ZMUUFBgYYMGdKl97LZbLriiit02223afLkya2+9sUXX2j+/PmqqKhoNb5s2TI98MADXZoPAAAAAACgPT65gsg0Tb300kttxh955BF3OCRJV111lcaMGdPqmn379mn16tVdnvvpp5/WHXfcoTPOOEMhISFdfp9x48Zp06ZNWrZsWZtwSJJOP/103XfffW3GDx482CY0AgAAAAAA8IRPBkQ7duxo03coJiZGU6ZMaXPt7Nmz24wtX76822rrqLPPPluTJk067jUzZ85sd/y72+oAAAAAAAA84ZMB0ebNm9uMpaWltXttez2Cvv76a6/X1B2amprajIWEhCg+Pt6CagAAAAAAQF/lkz2I9u7d22bsWMe/JyQkdOj+3uitt95qMzZ37twT9k+qq6tTXV2d+7XT6fR6bQAAAAAAoO/wyRVE5eXlbcbCwsLavTY0NLRD9/c2u3bt0uOPP95m/NZbbz3hvQ8//LCioqLcj5SUlO4oEQAAAAAA9BE+GRBVV1e3GQsIaH8x1NFNq1tUVlZ6vSZvOnTokObNm9em19Att9yi733veye8/6677lJ5ebn7kZ2d3V2lAgAAAACAPsAnt5i1tyqooaGh3WvbGw8PD/d6Td6Snp6uOXPmKCcnp9X4hRdeqP/7v//r0HsEBQUpKCioO8oDAAAAAAB9kE+uIIqKimozVlVV1e617Y23d39vsH79es2YMaNNOHTeeefptddek91ut6gyAAAAAADQl/lkQDR8+PA2YwUFBe1eW1hY2KH7rfbRRx9p9uzZKi0tbTX+wx/+UG+88YYCAwMtqgwAAAAAAPR1PhkQTZkypc1YZmZmu9fu2rWrzdjkyZO9XpMn/vWvf+m8885rs9rplltu0bJly47ZXwkAAAAAAMAbfDIgGjNmjFJTU1uNlZWVadOmTW2u/fjjj9uMLVy4sNtq66y//OUvuuKKK1RfX99q/OGHH9YTTzxxwiPtAQAAAAAAPOWTAZFhGLryyivbjN99992tmlIvXbpU6enpra4ZNmyYZs6c2WrszDPPlGEYrR4HDx7sltqP9uCDD+qnP/2pXC6XeywgIEBLly7Vr3/9626fHwAAAAAAQPLRU8wk6fbbb9cLL7yg3Nxc99iKFSs0btw4zZw5U9nZ2frwww/b3Pf44497tCrnT3/6k/bu3et+7XQ621yzd+9e3XLLLa3GfvjDH2ratGnu188//7x++9vftrl37Nix2rJlS5v7j/bzn/+8V/ZRAgAAAAAAvslnA6LIyEgtW7ZMCxcuVE1NjXs8MzPzmP2IFi9erPPPP9+jeV9//XWtXr36uNfk5OToySefbDU2ceLEVgHRd08qa7FlyxZt2bLluO9/4YUXEhABAAAAAACv8cktZi1mzZqld999VykpKce9zuFw6J577tHTTz/dQ5UBAAAAAAD4Dp9dQdRi1qxZysjI0NKlS/Xmm28qPT1dxcXFCg8PV3JysubMmaNrrrlGo0aNsrpUAAAAAACAXskwTdO0ugh0L6fTqaioKJWXlysyMtLqcgDgmDZv3qwpU6botmfeUPKIMVaXc1yH9+zU44sv0ldffaXJkydbXQ4AAADQRmfyAJ/eYgYAAAAAAADPERABAAAAAAD4OQIiAAAAAAAAP0dABAAAAAAA4OcIiAAAAAAAAPwcAREAAAAAAICfIyACAAAAAADwcwREAAAAAAAAfo6ACAAAAAAAwM8REAEAAAAAAPg5AiIAAAAAAAA/R0AEAAAAAADg5wiIAAAAAAAA/BwBEQAAAAAAgJ8jIAIAAAAAAPBzBEQAAAAAAAB+joAIAAAAAADAzxEQAQAAAAAA+DkCIgAAAAAAAD9HQAQAAAAAAODnCIgAAAAAAAD8HAERAAAAAACAnyMgAgAAAAAA8HMERAAAAAAAAH6OgAgAAAAAAMDPERABAAAAAAD4OQIiAAAAAAAAP0dABAAAAAAA4OcIiAAAAAAAAPwcAREAAAAAAICfIyACAPis2oYmFVbUqqahyepSAAAAAJ8WYHUBAAB0hMs0tSvPqewjNSqvblBZdb1qG12SJEPSgOhgDY0L19C4MMWEBVpbLAAAAOBjCIgAAL1evrNWKzMKVVhR1+ZrIQ67ahqalFtWq9yyWn2+t1jRoQ6dNjRWIxIjLKgWAAAA8D0ERACAXquuoUlf7CvR9pxySVJggE2TUqIVFx6kqBCHokMdcthtctY06EBxlfYXV+nwkWqVVTfovR35OqmkWjNHxiswgB3VAAAAwPEQEAEAeqUDxVVakV7g7i80qn+Ezhgep7Cgtj+6IkMcmpASrQkp0aprbNJXh45o48EjSs9zKqesRnPHJGpAVEhPfwsAAACAzyAgAgD0OoW1htZuy1OTaapfaKBmjYpXckxoh+4NCrDrtGFxSu0Xpg/T81Ve06B/f3VYpwzup2lD+skwjG6uHgAAAPA9rLkHAPQqgf1HaF1RgJpMU8MTwvXDUwZ1OBw62sCYEP1o2iCNTAyXaUrrD5Rq9e4imabZDVUDAAAAvo2ACADQaxx2Nirhkt+p0TSU0i9Ec8ckym7r+oqfIIdd88cO0NmjEiRJWw+Xa/3+Um+VCwAAAPQZBEQAgF4ht6xG960ukT00SjGBLp07LkkBNu/8mBo7MEpnpsVLkr48WKrNh4545X0BAACAvoKACABguSNV9fqfFzeopMalhpJsnR7f6PWTxyYkR+u0YbGSpDV7i7Xjm5PRAAAAABAQAQB6gXve3K59RVWKC7Wp4LXfKsjePfNMHdxPU1JjJEmfZBRqd0FF90wEAAAA+BgCIgCApT7Yka/3tufLbjP069P7qamiuFvnO31YrMYOjJQkfbSzQEUVdd06HwAAAOALCIgAAJYpr2nQ/761Q5J0w/eGamiMo9vnNAxDs9ISNCQuTE2mqfd35KmhydXt8wIAAAC9GQERAMAyD7+3S4UVdRoaF6abzx7RY/PaDEPnnJSo8KAAHalu0KrMoh6bGwAAAOiNCIgAAJZYu7dY/9yYLUl65OLxCnZ0U+OhYwhx2DV3TKIMSel5TmXm048IAAAA/ouACADQ42rqm/TrN7ZLkv5neqqmDelnSR3JMaGa+s3cn2YUqqy63pI6AAAAAKsREAEAetwTH+9WVmm1BkQF61fz0iyt5ZTB/ZQUHaz6Jpfe35GvJpdpaT0AAACAFQiIAAA9KiPfqRfW7JckPfT9sYoI7v7G1MdjsxmaN6a/ggJsKqyo07r9JZbWAwAAAFiBgAgA0KP+34o9cpnS/LH9ddaoRKvLkSRFBDs0e3RzLZuzjqioos7iigAAAICeRUAEAOgxO3LK9cHOfBmGdNs5I60up5XhCeEanhAu02zuR2SabDUDAACA/yAgAgD0mP/38R5J0vkTkjQiMcLiatqaOSJeDruhfGetduQ6rS4HAAAA6DEERACAHrHtcJk+3lUgmyHdfPYIq8tpV3hwgE4dGitJ+mJvsarqGi2uCAAAAOgZBEQAgB7x+IrdkqQLJw3UsPhwi6s5tgnJ0YqPCFJdo0uf7y22uhwAAACgRxAQAQC63VeHjmhVZpHsNkM3n9U7Vw+1sNkMnTUqQZKUkV+h7NJqiysCAAAAuh8BEQCg2/2/j5tXD108eaAGx4VZXM2J9Y8M1vjkKEnSp5mFanS5LK4IAAAA6F4ERACAbvXlgVKt2VOsAJuhm3r56qGjnTYsVqGBdpVVN+jrrDKrywEAAAC6FQERAKBbtaweunRqilL6hVpcTccFBdg1Y3icJGnTwSOqrqdhNQAAAPouAiIAQLfZkVOutftKFGAztHjWcKvL6bS0/hFKiAhSfZNLXx4otbocAAAAoNsQEAEAus3Law9KkhaMG6CB0SHWFtMFhmHojG9WEW3PKdeR6nqLKwIAAAC6BwERAKBblFTW6a2tuZKkn5w+2NpiPJDSL1SDY0PlMqW1e0usLgcAAADoFgREAIBu8c+N2apvdGlCcpQmpURbXY5HTh8eJ0PS3qJK5ZbVWF0OAAAA4HUERAAAr2tocunv6w5Jkq48bbAMw7C4Is/EhQfppKRISdLne4tlmqbFFQEAAADeRUAEAPC6D3fmK99Zq7jwQC0cP8Dqcrxi+tBYBdgM5ZXXal9RldXlAAAAAF5FQAQA8LqXvjgoSfrhKakKCrBbW4yXhAcFaPKgGEnSF3uL5WIREQAAAPoQAiIAgFftyCnXpkNHFGAztOiUQVaX41VTUmMU4rCrrKZBh6r4EQoAAIC+g99uAQBe9dI3R9svHD9ACZHB1hbjZYEBNk0d3LyKKKPcLtkCLK4IAAAA8A4CIgCA1xRX1untLd8cbX/aYGuL6SbjBkYpLNCu6iZD4eNmW10OAAAA4BUERAAAr/nnl1mqb3JpQkq0Jn3Tr6evCbDbdPLgfpKkqNMuVUMTzYgAAADg+wiIAABe4XKZ+seX2ZKkK09Ntbia7jU2KVLBdlMBkQn6+EC11eUAAAAAHiMgAgB4xfr9Jcopq1FEcIAWjOsbR9sfS4DdprTIJknSf3ZVqrahyeKKAAAAAM8QEAEAvOJfm5pXD50/IUnBjr5xtP3xDAl3qbGiWKU1Lr22MdvqcgAAAACPEBABADxWXtOg93fkS5IuPTnF4mp6ht2Qytf9S5L07Kq9rCICAACAT+N8XgCAx97Zmqu6RpfSEiM0PjnK6nJ6TOW2jzR04Y0qcNbpsf+s1cKRYVaXdExxcXEaNGiQ1WUAAACglyIgAgB47N9fHZYkXXJysgzDsLianuEsLZKaGrX/nWcVO+/nem7NAf3vomulpgarS2tXSGioMnbtIiQCAABAuwiIAAAeycyv0NbsMgXYDH1/0kCry+kxNZVOSdKM6SfrgN1UTUSsLnzo3xoa4bK4srYKsvbplUd/qeLiYgIiAAAAtIuACADgkX9/05z67NEJig0PsrianhefNEhxA+P12Z5i7a8N0RkTU2Wz+ccqKgAAAPQdNKkGAHRZQ5NL//06R5L/NKduz9iBUQpx2FVe06DdhRVWlwMAAAB0Wp8IiKqrq/Xss89qzpw5SklJUXBwsOLi4jRx4kTdeeedysjI8NpcR44c0Ycffqj7779fCxcuVFxcnAzDaPU488wzO/WepmnqjTfe0BVXXKGhQ4cqLCxMkZGRGjlypK655hp98sknXqsfALzp04xClVTVKz4iSDNHxltdjmUcdpsmpkRLkjYdOiLTNK0tCAAAAOgkn99itmrVKl155ZXKyspqNV5XV6eSkhJt3bpVTzzxhO68807df//9HjdPnTx5sg4ePOjRexwtOztbV1xxhb744os2X6uoqNCePXu0ZMkSXXDBBXr55ZcVFeU/pwMB6P1atpddPDlZAfY+8ZlDl41PjtKmQ6UqqazXgZIqDY0Lt7okAAAAoMN8+rf5lStXasGCBW3Coe9qaGjQgw8+qJtuusnjOb35qXBubq7OPPPMdsOh73rrrbc0d+5cVVVVeW1+APBEobNWKzOLJDWfXubvgh12jR8YLUnadJBVRAAAAPAtPhsQOZ1OLVq0SDU1Na3G09LSdP3112vevHmy2Vp/e88884zefvttr9UQEBCg0aNHd/n+G264Qfv372811q9fP/3kJz/RFVdcobCwsFZf27Bhg3772992eT4A8KY3t+SoyWVq8qBoDYtntYwkTRoULbvNUF55rXLKak58AwAAANBL+GxA9Mc//lG5ubmtxmbPnq3t27frueee0/vvv68XXnihzX233XabR5/qnnfeeXrssce0Zs0aOZ1Ovffee116n5UrV2r58uWtxgYMGKDt27dr6dKlevXVV7VhwwaFhoa2uuapp57SgQMHulw/AHjLm183/zf44imsHmoRFhSgkwZESmpeRQQAAAD4Cp8MiEzT1EsvvdRm/JFHHpHD4XC/vuqqqzRmzJhW1+zbt0+rV6/u8txPP/207rjjDp1xxhkKCQnp8vssWbKkzdjtt9+upKQk9+sxY8boqquuanVNU1OTXn755S7PCwDesKegQul5TjnshhaMHWB1Ob3KlNQYGZIOlVar0FlrdTkAAABAh/hkQLRjx442fYdiYmI0ZcqUNtfOnj27zdh3V+5Yob2VR+ecc06bsd5aPwD/9uaW5qPtZ46MV0xYoMXV9C5RIQ6NTIyQ1HyiGQAAAOALfDIg2rx5c5uxtLS0dq9tr0fQ119/7fWaOuPQoUMqLS1tM97e99Be/Tt27FBjY2O31AYAJ2Kapt7a0ry97IKJAy2upneakhojSdpbWKnymgaLqwEAAABOzCcDor1797YZS0xMbPfahISEDt3fk9qbPyoqSkFBQW3G26u/rq5O2dnZx3z/uro6OZ3OVg8A8JbNWUd0+EiNwgLtmj26/f/2+rv4iCAN6hcqU9KW7DKrywEAAABOyCcDovLy8jZj3z3xq8V3mzwf6/6e5Gn9x3qPFg8//LCioqLcj5SUlK4VCgDtaGlOPXdsf4UE2i2upveaPChakrQzt1y1DU3WFgMAAACcQI8ERE6nU0VFRV57v+rq6jZjAQEB7V57dNPqFpWVlV6rpSs8rV86/vdw1113qby83P043mojAOiMhiaX3t2eJ4ntZScyqF+oYsMD1dBkakeOtR9MAAAAACficUCUm5vrfpSUlLT62uuvv67Ro0crJiZG/fv3V2xsrO655x41NHjWj6G9VTXHes/2xsPDwz2a31Oe1i8d/3sICgpSZGRkqwcAeMOaPUUqrapXXHigTh8Wa3U5vZphGJo8qLkX0ZbDZWpymRZXBAAAABybRwHR2rVrlZKS4n488MAD7q+99957uuyyy7R7926ZpinTNHXkyBE98sgjuuaaazwqOioqqs1YVVVVu9e2N97e/T3J0/qP9R4A0N1atpedOz5JAXaf3KXco9ISIxQWaFdVXZN2F1RYXQ4AAABwTB79dv/ZZ5+5wx9JuuSSS9xfu//++93jhmG4H6Zp6pVXXtG6deu6PO/w4cPbjBUUFLR7bWFhYYfu70ntze90OlVbW9tmvL36g4KC6CsEoMdV1TVqRXrzf2svnMT2so6w2wxNSImW1Nzcu+XnIgAAANDbeBQQHX1cfFhYmKZPny6pedvZl19+KcMwJKlViNTi1Vdf7fK8U6ZMaTOWmZnZ7rW7du1qMzZ58uQuz+0Nqampio1tuzUjIyOjzVh79Y8bN+6YPYsAoLusSC9QTUOTUmNDNSGZVYwdNW5glBx2Q8WV9co+UmN1OQAAAEC7PAqI9uzZI6l5hdC4ceNktzefZvPFF1+0uu5HP/qR5s6dK9M03aHRhg0bujzvmDFjlJqa2mqsrKxMmzZtanPtxx9/3GZs4cKFXZ7bWxYsWNBmrL1ae2v9APzPm1tyJDU3p275bzlOLNhh10kDmnvBbT50xOJqAAAAgPZ5FBAVFxe7/0gYMWKEezw9Pd39fNq0afr73/+ud999VyNHjpTUvKLowIEDXZ7XMAxdeeWVbcbvvvvuVk2dly5d2qoWSRo2bJhmzpzZauzMM89stQ3OMAwdPHiwy/V1xFVXXdVm7PHHH1dubq779c6dO/XSSy+1usZut7f7vQNAdyqprNOaPcWSpAsnJllcje+ZNChGhqRDpdUqrqyzuhwAAACgDY/2KRUXF7ufH31S1tHbvU477TRJks1m06mnnqrdu3dLkioqPGvWefvtt+uFF15oFaisWLFC48aN08yZM5Wdna0PP/ywzX2PP/64R598/+lPf9LevXvdr51OZ5tr9u7dq1tuuaXV2A9/+ENNmzbN/XrWrFk699xztXz5cvdYXl6exo0bp/PPP1/19fV66623VF1d3ep9br75Zg0ZMqTL9QNAV7y3I19NLlNjB0ZqaLy1J0H6oqgQh4YlhGtvYaW2ZJdp9uhEq0sCAAAAWvEoIHK5XO7nlZWV7udHB0RHryyKiIhwP/e0UWdkZKSWLVumhQsXqqbm254OmZmZx+xHtHjxYp1//vkezfv6669r9erVx70mJydHTz75ZKuxiRMntgqIJOm5557TjBkztH//fvdYaWlpm1VDLU455ZRWJ8UBQE9ZvrU5jD9/AquHumpSSrT2FlYqI79Cpw+LU0ig3eqSAAAAADePtphFR0dLag571q5dK6m5F9DOnTvd17RsK5Nah0hHh0VdNWvWLL377rsnPNHL4XDonnvu0dNPP+3xnN6UlJSklStXuldZHc/555+vDz/8UGFhYT1QGQB8q8BZqy8PlkqSFo4nIOqqAVHBSogIUpPL1I7ccqvLAQAAAFrxaAXR8OHD3cew79mzR6eddpoqKytVX1/vvmbChAnu51lZWZKaewgNHOidI5JnzZqljIwMLV26VG+++abS09NVXFys8PBwJScna86cObrmmms0atQor8znbYMGDdLnn3+u//73v3rttdf05ZdfqqCgQHa7Xf3799cZZ5yhRYsW6eyzz7a6VAB+6t1teTJNafKgaA2MDrG6HJ9lGIYmpkTro/QCbTtcrsmDYmS30ewbAAAAvYNHAdHMmTO1du1aGYYh0zS1YcMG99YxwzA0ceLEVse5b9++3d3/x5uBTWhoqBYvXqzFixd36f5Vq1Z1y7UdZRiGLrroIl100UVef28A8NQ725q3l53H9jKPjUgM15o9xaqsa9S+okqNTPR8NS0AAADgDR5tMbv22mvlcDgkyR38HN0A+sYbb3Q/3759u4qKityvp06d6snUAIAekF1ara+zymQY0oJxA6wux+cF2GwalxwlSdqSXWZtMQAAAMBRPAqIhgwZoj//+c8KCAiQaZruhyRdeOGFuu6669zXvvbaa5K+bU591llneTI1AKAHvLs9T5J0ypB+SowMtriavmH8wCjZDCmvvFYFzlqrywEAAAAkebjFTJKuvvpqzZgxQ//617+UlZWliIgInX322Zo/f36r66ZNm6bnn39eUvMqo8mTJ3s6NQCgmy3/ZnvZuTSn9pqwoACNTIxQRn6FtmSXae6Y/laXBAAAAHgeEEnNR9nfc889x73G0+PlAQA960BxlXbkOGW3GZo/lhDDmyakRCsjv0K7Cyp0xvA4hQV55ccxAAAA0GUebTEDAPRdy7c2rx46bVisYsODLK6mb+kfGawBUcFymdL2HI68BwAAgPW8+pFlZWWlPv74Y61bt075+fmqrKzUueeeq6uuusqb0wAAesDybc39hzi9rHtMTIlWXnm+th0u18mDYxRg4zMbAAAAWMcrAZFpmnrkkUf02GOPqby89SehqampcrlcSk1NVW5u86fR55xzjj744ANvTA0A6Aa7CyqUWVAhh93Q3JPYXtYdhsWHKzwoQJV1jdpbWKlR/SOtLgkAAAB+zOOPK+vr6zV//nz95je/UVlZWZvTzCTJZrPp6quvdo9/8sknys/P93RqAEA3adleNnNkvKJCHRZX0zfZbYbGDmwOhbYdZpsZAAAArOVxQPSzn/1MH330kUzTlGEY7sd3XXzxxZKaTzBzuVx6//33PZ0aANANTNPUO99sL+P0su41NunbI++LKuqsLgcAAAB+zKOAaPPmzVqyZEmrUOjolUNHGz9+vBISEtyvV61a5cnUAIBusjPXqQPFVQoKsGn2SYlWl9OnhQUFaHh8uCRp6+Eya4sBAACAX/MoIFqyZIn7uWmaSkpK0kMPPeReTfRdEyZMcAdIO3fu9GRqAEA3eeeb7WVnj05QOMevd7vxydGSpMz8CtU1NFlbDAAAAPyWRwHRqlWrZBiGTNNUWFiY1q9fr7vuuuuY1ycnJ0tqDpOysrI8mRoA0A1cLvPb08vYXtYjkqKDFRsWqEaXqfQ8p9XlAAAAwE95FBDl5ORIau4rNG/ePA0cOPC414eEhLifO538EgwAvc3X2UeUU1ajsEC7Zo1KOPEN8JhhGBqfHCWpuVn1sbZqAwAAAN3Jo4CopqbG/TwuLu6E1xcWFrqfBwSwbQEAept3tjavHpozpr+CHXaLq/Efo/pHKtBuU1lNg7JKq60uBwAAAH7Io4CoX79+7ue7d+8+7rWmaWrTpk3u3kRH3wsAsF7T0dvLJgywuBr/Ehhg0+gBEZI48h4AAADW8CggGjlypEzTlGmaWr16tb788stjXvviiy/q4MGDkpqX05900kmeTA0A8LIN+0tUXFmn6FCHzhgeb3U5fqelWfWB4io5axusLQYAAAB+x6OAaObMmZKaAx+Xy6U5c+a0aVK9a9cu3XDDDfrpT3/qbmgtSWeeeaYnUwMAvOydbc2nl80f21+BAR79eEAX9AsLVHJMiExJO3JYRQQAAICe5dFfANdee627l5BhGHI6nfrDH/4gSe6VRR999JFeeOEFNTV9e3Svw+HQVVdd5cnUAAAvqm906f0d+ZI4vcxKLc2qd+Q41eSiWTUAAAB6jkcBUUpKim677Tb3qqCjVwi1aAmKWnoPGYahX/7yl0pMTPRkagCAF32xt1hl1Q2KCw/SKUNjrS7Hbw2LC1dYoF01DU3aV1RpdTkAAADwIx7vIXjooYd04YUXtgqJ2ntIzWHRhRdeqPvuu8/TaQEAXvTO1ubtZeeOHyC7zbC4Gv9lsxkak9S8img728wAAADQgzwOiOx2u9544w099thj6tevn3vF0Hcf0dHRevTRR/X666/LZqO3BQD0FrUNTfoovUASp5f1BmOSIiVJh4/U6Eh1vcXVAAAAwF8EeOuNbr/9dv3sZz/TqlWrtGHDBhUVFUmSEhISNHXqVJ155pkKDQ311nQAAC9ZlVmoyrpGDYwO0aSUGKvL8XuRIQ6lxobqUEm1duY4dcaIOKtLAgAAgB/wWkAkSSEhIZo/f77mz5/vzbcFAHSjd7bmSWreXmZje1mvMG5glA6VVCs9z6npw/opgJW3AAAA6Gb8xgkAfqyyrlGfZLRsL+P0st5iSGyYwoMCmptVF1ZZXQ4AAAD8AAERAPixT3YVqLbBpSFxYe7eN7CezWbopG/+99hBs2oAAAD0gA5tMTvrrLO8PrFhGPrkk0+8/r4AgI5rOb3svPED3CdOoncYmxSpjQdKdbisRkeq6hUTFmh1SQAAAOjDOhQQrVq1yqt/OJimyR8iAGCx8uoGrd7dfKAA28t6n4hghwbHhelAcZW255breyPirS4JAAAAfVintpgd6wj7zjwAAL3Dhzvz1dBkalT/CI1IjLC6HLRj7MDmbWa7cp1qbHJZXA0AAAD6sk4FRIZhePwAAPQO72z7ZnsZq4d6rcHfNKuubXRpb1Gl1eUAAACgD+twQNTRlUEd+ToAwFrFlXX6Ym+xpObj7dE72QxDY93Nqp0WVwMAAIC+rEM9iA4cOHDMrzmdTv3whz/Uzp07FRYWpp/97GeaN2+ekpOTJUmHDx/WBx98oD//+c+qqqrSoEGD9Prrrys+nl4KAGCV97fnyWVKE1KilRobZnU5OI6TkiK14UCpcspqdKS6XjGhNKsGAACA93UoIEpNTW133DRNzZo1Szt37lRwcLA+++wzTZo0qdU1I0aM0KxZs3TFFVfo1FNPVXZ2tm644QZt2LDB8+oBAF3yztY8Sc2nl6F3iwh2aFBsqA6VVCs916nTh8dZXRIAAAD6oE71IPquF198UZ999pkMw9CCBQvahENHmzhxohYuXCjTNLVlyxY988wznkwNAOiivPIafXmwVIYhnTue/kO+YGxSlCQpPc8pl4ut2gAAAPA+jwKil156yf08MTHxhNcnJCS4n7/66queTA0A6KJ3tzWvHpo6uJ/6RwVbXA06YkhcmEIcdlXXN+lgSZXV5QAAAKAP8igg2rVrlwzDkGmaWrt27QmvX7dunfv63bt3ezI1AKCL3tnK6WW+xm4zNHpAhCRpZy7NqgEAAOB9HgVEdXV17ufbtm3TL37xi1ZjR1936623auvWre6xxsZGT6YGAHTBoZIqbT1cLrvN0Pyx/a0uB50w5pttZgdKqlRVx89QAAAAeFeHmlQfy4gRI7Rt2zb3qqA//elPWrZsmWbMmNHqFLM1a9aorKxM0rfH3A8bNsyzygEAnfbWlubVQ6cNi1VceJDF1aAz+oUFakBUsPLKa5We59TUwf2sLgkAAAB9iEcB0aWXXqqtW7fKMAx3SHTkyBG98847ra5rCYVarjEMQ5dffrknUwMAOsk0Tb2x+bAk6aLJAy2uBl0xJilSeeW12pnr1MmpMTIMw+qSAAAA0Ed4tMXs1ltvVVpamvv10UHR0Y+W8RYjR47Urbfe6snUAIBO2pxVpoMl1QoNtGvuGLaX+aIRCRFy2A2V1zQop6zG6nIAAADQh3gUEAUHB+vjjz/W6NGj3auEpG+Dou8GQ6ZpavTo0VqxYoWCgtjaAAA9qWX10PyxAxQa6NECUlgkMMCmtESaVQMAAMD7PAqIJGngwIHavHmz7rvvPiUmJrZZPdTySEhI0H333afNmze7+xMBAHpGXWOT+/Qytpf5tpZm1XsKK1XX0GRxNQAAAOgrvPIRcmBgoH7729/q7rvv1tdff60tW7aopKREkhQbG6uJEydq0qRJstvt3pgOANBJn+4qlLO2UQOigjV9aKzV5cADiZFBig0LVElVvTILKjQ+OdrqkgAAANAHeHWPgd1u18knn6yTTz7Zm28LAPDQfzbnSJIunDRQdhuNjX2ZYRg6KSlSa/YUKz3PSUAEAAAAr/B4ixkAoHcrqazTqsxCSdJFk9he1heM6h8hmyEVOOtUUllndTkAAADoAwiIAKCPe2drrhpdpsYnR2nENw2O4dtCAwM0ODZMkrQrr8LiagAAANAXdGiL2dChQ93Pr776av3mN79pM95ZhmFo3759Xb4fANAxb3zdvL2M1UN9y0lJkdpfXKVd+U6dNixWNrYOAgAAwAMdCogOHjwoqTnUKS0tbTVuGEarI+47yjD4RRYAutvewgptO1yuAJuh8yYkWV0OvGhwbJhCHHZV1zfpUGm1hsSFnfCeXbt29UBlnomLi9OgQYOsLgMAAMDvdLhJ9fECnc6GPV0JlAAAnffGN82pz0xLUGx4kMXVwJvsNkNp/SO0JbtM6XnO4wZEztIiSdKiRYt6qrwuCwkNVcauXYREAAAAPcyrp5gBAHoPl8vUf7/ZXnbxZLaX9UUnDYjUluwyHSiqUk1Dk0Ic9navq6l0SpIW3nCP0sZP6ckSO6Uga59eefSXKi4uJiACAADoYR0OiI616ofVQADQO63ZW6y88lpFBgforNEJVpeDbhAfEaT48CAVVdZpd36FJqREH/f62KRUJY8Y0zPFAQAAwKd0KCByuVydGgcAWO8fG7IkSRdNTlZQQPsrS+D7Rg+IUNGeOqXnOU8YEAEAAADHwjH3ANAHFTpr9fGuAknSFdPYqtOXjeofKZshFVbUqbiyzupyAAAA4KMIiACgD/r3V4fV6DI1JTVGaf0jrC4H3Sgk0O5uUJ2e57S4GgAAAPgqjwIiu93uftx2220nvP6NN97QzTff7H4AALzP5TL12sZsSdLlU1MsrgY94aQBkZKkjLwKNbnoDQgAAIDO8+gUs5YG1R095n7NmjV65pln3K+feuopT6YHALRj7b4SZZVWKyI4QOeOT7K6HPSA1NgwhTjsqmloUlZp9XGPvAcAAADa4/EWs46GQy1M0+TkMwDoRv/4srk59fcnDVRIIM2p/YHdZigtsXkrYUY+28wAAADQeT3ag8jp5JdWAOhOxZV1+ig9X5J0+VSaU/uTtAHNAdG+oirVNTZZXA0AAAB8TY8ERKZpaufOnfrggw/cK446u/IIAHBi//nqsBqaTE1IidZJSZFWl4MelBgRpJhQh5pcpvYVVlldDgAAAHxMpwKio5tS2+3N2xZatow9+eSTbb7e8ggICND48eOVn5/vfq/w8HDvficA4OdM03RvL/vhNJpT+xvDMDSq/zfNqtlmBgAAgE7qVEDUEga110fou1873sMwDI0YMcKr3wgA+Lt1+0t0sKRa4UE0p/ZXaf2bt5llH6lRRW2DxdUAAADAl3T6FLOjt4YdHRJ1dsvYhRde2NmpAfiBrKwsFRcXW11Gh8TFxWnQoN7T5+cfXzYfbX/BxCSFBXl0SCV8VFSIQ0lRwcotr9XugkpNSY2xuiQAAAD4iB77C+LoMGnatGm67bbbempqAD4iKytLo0aPVk11tdWldEhIaKgydu3qFSFRfnmt3t+eJ0m6Ypr19cA6owZEKre8Vhn5TgIiAAAAdFinAqJBgwa1Wil06NAh9+vw8HD169ev3fvsdrvCw8M1YsQILViwQIsWLZLD4fCgbAB9UXFxsWqqq/WjOx9T4qBhVpdzXAVZ+/TKo79UcXFxrwiIXl53UI0uU9OG9NPYgVFWlwMLjUgI1+rMIhVX1quook7xEUFWlwQAAAAf0KmA6ODBg61e22zftjC65ppr9Pjjj3ulKAD+LXHQMCWPGGN1GT6jur5Rr25obk597RlDLK4GVgt22DU4LlT7iqqUmV9BQAQAAIAO8fiY+/YaVgMAes5/vjqs8poGpcaG6uzRiVaXg17AfZpZgVMufkYDAACgAzzqQbRy5Ur38+TkZI+LAQB0jstlaskXByVJV58+RHZb5w4MQN80OC5UQQE2VdU16fCRGqvLAQAAgA/wKCCaOXNmu+NHjhxxn0IUFxenmBiaZAJAd/g0o1AHiqsUGRygH0whqEezAJtNIxLDtSPHqYx8p2KtLggAAAC9nsdbzFrs3LlTN954o1JTUxUXF6dRo0Zp1KhRiouLU2pqqn76059qx44d3poOACDpxc8PSJKuOGUQR9ujlZZtZnsLK9UkVpYBAADg+LwSEN1zzz2aNGmSnn/+eWVnZ7v7ErU8srOz9de//lWTJk3SXXfd5Y0pAcDv7cwt17r9JbLbDF156mCry0EvkxQVrMjgADU0mSpVhNXlAAAAoJfzOCC6/vrr9cgjj6ixsVGmacowjHYfpmmqqalJf/jDH3TNNdd4o3YA8Gstq4cWjhugpOgQi6tBb2MYhnsVUaGiLK4GAAAAvZ1HAdHbb7+tF154QZLcQdCxHB0UvfTSS3rnnXc8mRoA/Fqhs1bvbM2VJF07g6Pt0b5R/ZtXDh1RmGyhhEQAAAA4No8ComeffbbV65YtZXa7Xf3791f//v1lt9vd40d75plnPJkaAPza0rUH1dBkaurgGI1Pjra6HPRSMWGBSowMkmQobNQMq8sBAABAL+ZRQPTVV1+5VwVJ0pgxY/Tuu++qqqpKOTk5ysnJUVVVld59912NGzfOvQXNNE1t2rTJK98AAPibkso6vbz2oCTphu8Ns7YY9Hot28zCxpxlcSUAAADozTwKiKqqqtzP4+LitGbNGs2fP18Oh8M97nA4NH/+fK1atUrx8fHu8ZqaGk+mBgC/9dxn+1Vd36TxyVE6e3SC1eWglxuZGC7JVFDSSFW77FaXAwAAgF7Ko4AoOTnZvSpowYIFio6OPua1MTExWrBggXu1UXJysidTA4BfKqyo1d/WHZQk3XrOyOP2fgMkKTQwQDFq/kCnoDHU4moAAADQW3kUEM2bN8/9PCAg4ITXt1zTEigBADrnz6v2qbbBpcmDonXmyPgT3wBISlC5JKmwKaRNT0AAAABA8jAguu222xQeHi7TNPXhhx+qtrb2mNfW1NToww8/lCRFR0frl7/8pSdTt1JdXa1nn31Wc+bMUUpKioKDgxUXF6eJEyfqzjvvVEZGhtfm6o45t2/frl//+teaMWOG+vfvr5CQEDkcDvXr108TJ07U1VdfrXfffZdf6gE/l19eq1c2ZEmSbjsnjdVD6LB+qpCrrlq1ZoDyyo/9sxoAAAD+y6OAaPDgwVq2bJkCAwOVk5OjhQsXateuXW2uS09P18KFC3X48GFFRETo9ddfV1JSkidTu61atUqjR4/W4sWLtWLFCh0+fFh1dXUqKSnR1q1b9Yc//EHjx4/Xb3/7W68FLN6as7GxUT/96U81YcIEPfroo/r8889VUFCg2tpaNTY26siRI9q6dauWLl2qc889V9OnT1dOTo5XvgcAvueZlXtV3+jStCH9dPrwWKvLgQ+xy1T17i8kSbvynRZXAwAAgN7oxPvCjuOss5pPRImJiVFBQYFWrVqlsWPHKjk5WSkpKZKk7OxsHT58WFLz1rKYmBg98MADeuCBB9p9T8Mw9Mknn3Ro/pUrV2rhwoUnbHjd0NCgBx98UEeOHNGf/vSnjn573T7nrbfeqr/85S8dnvvLL7/UvHnz9NVXXykwMLBTdQPwbTllNfrnxpbVQ/QeQudV7Vyl8HHnaE9BpWaOjFeAzaPPiAAAANDHeBQQrVq1yv1HytHH3R8dCh29gsYwDGVnZys7O7vd92tpeN0RTqdTixYtahPUpKWlaebMmcrKytJHH30kl8vl/tozzzyjOXPm6Pzzz+/4N9lNcxYWFurPf/5zmzni4+M1Z84cBQYGauXKlTp48GCrr+/YsUP//e9/ddlll3XpewDgm/706R41NJk6fXispg9l9RA6rzZruwKNJtU1SgeLqzU8IdzqkgAAANCLeOXjQ9M03eFOy+N4Y+09OuuPf/yjcnNzW43Nnj1b27dv13PPPaf3339fL7zwQpv7brvtti5vNfPmnBs2bFBTU1OrsSFDhmj37t1atmyZlixZoszMTE2fPr3N+23YsKFL9QPwTQeLq/TvTc2h+62zR1pcDXyW6VKCvfkDjsz8CouLAQAAQG/jlYCoJQDq6Fh7j84wTVMvvfRSm/FHHnlEDofD/fqqq67SmDFjWl2zb98+rV69ulPzdcecdXV1bd7rwgsvVHR0tPt1YGCgLr/88jbXfTdYAtB3maap+97ZqUaXqZkj43Xy4H5WlwQflhhQLUk6UFKlukZ+lgAAAOBbHgdEx1sV1NlHR+3YsUNZWVmtxmJiYjRlypQ2186ePbvN2PLlyzv9fXp7zpEj264CKCgo6NDY6NGjT1gvgL7ho/QCrcwsksNu6H/PO8nqcuDjwoxG9QsNVJPL1L6iKqvLAQAAQC/iUQ+iAwcOeKuOTtm8eXObsbS0tHavbS9M+frrry2fc/z48TrttNO0du1a99hrr72m008/XZdddpkcDoc++OADPfXUU63uS0hI0KJFizpdPwDfU13fqPvfSZckXf+9oRoWT88YeMYwpLT+EVq3v0SZ+RU6aUCk1SUBAACgl/AoIEpNTfVWHZ2yd+/eNmOJiYntXpuQkNCh+62Y8+9//7vmz5+v3bt3S2reOrZ48WItXry43fcdMGCA3nnnHYWHH/+PxLq6ulZb2JxOjjQGfNGfPt2rnLIaDYwO0c9njbC6HPQRIxPDtW5/ibJLq1VV16iwII9+FQAAAEAf4ZNn3JaXl7cZCwsLa/fa0NDQDt1vxZxDhw7Vxo0b9Yc//OGEoc/dd9+tjIyMdre0fdfDDz+sqKgo9yMlJeWE9wDoXfYWVur5NfslSfeed5JCAu0WV4S+Ijo0UImRQTIl7SmstLocAAAA9BLdEhBVV1frwIED2r59u/Ly8rrl/b8rIKD9T0CPbiDdorKy878Qd9ecy5cv19///vcT1vR///d/uuWWW1RVdeKeEXfddZfKy8vdj+zs7BPeA6D3ME1T//vWDjU0mTp7VILOOan91YpAV6UlRkiSdhdwmhkAAACaeW1dudPp1FNPPaXXX39dO3bscDed/sUvfqH/+7//08MPP+w+fWvChAm64IILujxXeyt0Ghoa2r22vfETrdbpqTnvuOMO/fGPf2w1lpycrBkzZigoKEjr169XRkaGJKm+vl5Lly7V1q1btXr16uN+D0FBQQoKCjru9wOg93pnW57W7itRUIBNvzt/TKdPegROZGRihNbsKVZeea3KaxoUFdL2gw0AAAD4F68ERCtXrtQVV1yhoqKiVqeRtfxRY7PZtH79er333nuSpKSkJJ1//vld/qMnKiqqzdixVta0N97e/T0951tvvdUmHFq4cKH+/e9/KyQkRJLkcrl0880365lnnnFfs3nzZj366KN64IEHOv09AOj9Sirr9MDy5sbUi2cNV0q/tuE04KmwoAAl9wtRdmmNMgsqNG1wP6tLAgAAgMU83mK2Zs0azZs3T4WFhe5wqL3g58c//rH7OPvc3FytWbOmy3MOHz68zVh7x8FLUmFhYYfu7+k5lyxZ0uaaRx55xB0OSc3B2iOPPNLmn+e///3vDtUMwLe4XKZu/ddWFVXUaVh8mK7/3lCrS0If5t5mls82MwAAAHgYENXW1uqKK65QQ0ODDMNwP9ozb948ORwO99dXrFjR5Xnba9ScmZnZ7rW7du1qMzZ58mTL52zv3qFD2/4xGB4erri4uFZjBw4cOG6tAHzTn1fv02e7ixTssOnZH01RsIPG1Og+w+PDZTcMlVTVq7iy7sQ3AAAAoE/zKCBasmSJcnNz3aGPaZoKDg5utc2sRUREhEaPHu3+2saNG7s875gxY5SamtpqrKysTJs2bWpz7ccff9xmbOHChZbP2V4j6/aCn4qKChUXF7caO3qVEYC+YcP+Ev3xo+bg+P7zxyqtf4TFFaGvC3LYNTiueQtjJquIAAAA/J5HAdE777wjSe7Q595773Uf597eSqJRo0a5r9+9e3eX5zUMQ1deeWWb8bvvvrtVg+ilS5cqPT291TXDhg3TzJkzW42deeaZrVZAGYahgwcPduuc7W1Zu/vuu1VbW+t+7XK5dNddd7UJ3EaMGNHmXgC+q7iyTjf942u5TOmiyQN1ycnJVpcEP9GyzSyzoKLdD3cAAADgPzxqUr1t2zYZhiHTNDV+/Hjde++9x72+X79vm2B+d1VMZ91+++164YUXlJub6x5bsWKFxo0bp5kzZyo7O1sffvhhm/sef/zxLjfH9uacF110kd58881WY2+//bZGjhyp733ve3I4HNqwYUO729UuueSSLtUPoPdpcpm69bUtKqyo0/CEcD144VhOLUOPGRIXpkC7TRW1jcorr1VSNCtUAQAA/JVHAVFJSYmk5tU1U6dOPeH19fX17udHr5TpisjISC1btkwLFy5UTU2NezwzM/OYvYEWL16s888/v1fM+aMf/Uh/+ctftHbt2lbj2dnZeuWVV45Zw+jRo3XTTTd18TsA0Ns8+fFurdlT/E3fockKDfTK4ZJAhwTYbRoWH6Zd+RXKzK8gIAIAAPBjHm0xO7oXzrGOfD/aoUOH3M8jIyM9mVqSNGvWLL377rtKSUk57nUOh0P33HOPnn766V4zp81m03vvvdepwOqss87Sp59+Sg8ioI/4y+p9eurTvZKkBy4Yq5GJ9B1Cz2vpd7WnsFJNLraZAQAA+CuPPqpOTEyU0+mUaZpavXq1GhsbFRDQ/lvm5uZq9erV7i1pSUlJnkztNmvWLGVkZGjp0qV68803lZ6eruLiYoWHhys5OVlz5szRNddc4+5/1JvmjIqK0ltvvaX169frn//8pzZs2KC9e/e6/5lGRkZq8ODBmjp1qi699FLNmjXLa98DAGu9sGa/Hnk/Q5J02zkjdcnJxw+dge6SEhOqEIddNQ1Nyj5SrcGxYVaXBAAAAAt4FBBNmzZNu3fvlmEYysvL0zXXXNPuipmcnBxdfPHFamxslNS8Je2UU07xZOpWQkNDtXjxYi1evLhL969atarH5zza9OnTNX36dI/fB/B1LtOULSRS5fWGXKXVqq5rVG2jS4EBNgU7bApx2BUcYFdokF1BAb57BPzSLw7owXeb+4vdfPYI3Xw2jedhHZvN0MjEcG09XK7M/AoCIgAAAD/lUUB0wQUXaNmyZZKaTyZbtmyZ/v3vf7tfG4ahpUuX6qmnnnK/bjkl5fvf/76HpQPoC7JLq7Uqs1CrMov0+Z4ipdz8qj7Ol5Sfc9z7okMcSogMUmJksBIjgpUQGSSH3aNdsz3ib+sO6r53mk86XDxrmG6d3TPhUFZWlseHA/SE9hrzo/ul9Y/Q1sPl2ldUqYYml0/8uwQAAADv8igguuiii5SWluZeRWSaZqvm06Zpuo+9l+Q+Qn7MmDFasGCBJ1MD8GHFlXVa+sUBvb8jX/uL2vYvC7SZiggJUmhQ84qh+kaXahqaVNvQpNoGl+qbXCqraVBZTYN2F1RKkuw2Q8kxIRoSG6bBcWGKCnH09Ld1XI1NLj316V499ckeSdKNM4fpjjlpPXJiWVZWlkaNHq2a6upun8tbKisrrS7Br/SPDFZkcICctY06WFylEfTDAgAA8DseBUSGYegf//iHTj/9dNXW1rb6Q6dlpdB3x0JCQtyrjgD4l8KKWv119X4t23BItQ0uSc3BzpTUGJ2ZFq/EphL9YM7puu3pfyt5xMhjvk9NfZMKK2pV4KxTYUWt8p21qqpr0qGSah0qqZZ2F6lfaKCGJYRpREKE4sIDLT06Pru0Wre8tkVfHToiSbr+e0N157yeCYckqbi4WDXV1frRnY8pcdCwHpmzq3Z9uVrvv/ykxyddonMMw9DIxAhtOnREmQUVBEQAAAB+yOPzlCdOnKiPP/5YF110kQoKCiR9u1KoRUtYlJiYqDfeeEPjx4/3dFoAPqSook5/XrVPr2w4pLrG5mBoQnKUrp0xVDPT4hUZ3LzaZ/Nmp+RqOuH7hQTalRobptRveqWYpqnSqnodKKnSweJq5ZbXqLS6XqUH67Xx4BFFhzg0IjHckrDorS05+s1/d6iirlERQQF68PtjdcHEgT02/9ESBw1T8ogxlszdUQVZ+6wuwW+l9W8OiA4WV6u2oUnBDt/t8wUAAIDO8zggkqRTTz1Ve/fu1TPPPKP//Oc/+uqrr+RyNf8RaLPZNGXKFF100UX6+c9/rvDwcG9MCcBHrEgv0K9e36oj1Q2SpEmDovWLs0do5sh4rwU1hmEoNjxIseFBOjm1n2obmnSwpEp7Cyt1sKRaZTUN2njwiDYePKKoEIdGJIRrZGL3hkWHSqr0+IrdemtLriRpSmqM/t9lE5XSL7Rb5gM8FRcepNjwQJVU1mtvUaXGJkVZXRIAAAB6kFcCIkkKCwvTr371K/3qV7+Sy+VSSUmJJCk2NlY2G80uAX9TU9+kh95L17L1WZKk0QMiddf8UZoxIq7bV/AEO+wa1T9So/pHqr7RpQPFVdpTWKGDJdUqr2nQpkNHtOlQc1g0PCFcg/qFKikqWAEeNuY1TVNfHijVi58f0IpdBTJNyWY0n1T281nDPX5/oLulJUZobWWJMvMrCIgAAAD8jNcCoqPZbDbFx8d3x1sD8AHpuU7d/M+vtbewudHwdTOG6I65aZYcTR8YYFNa/wil9Y9QfaNLB0uqtKegUgdKqlRe06CvDh3RV4eOKMBmKCk6RIP6hWpAVLD6hQV2aItNfZOpLdll2ppdpte/OqztOd825p+VFq+bzh6hyYNiuvNbBLxmZGKE1u4r0eEjNaqqa1RYULf8mgAAAIBeqMu/+WVmZmrt2rUqKipScHCwhg4dqlmzZiksLMyb9QHwMf/9+rDufH276ptcSogI0h8vnaAZI3pHYBwYYNPIxAiNTPw2LDpYXKWs0mpV1Tcpq7RaWaXfnvQV4rCrX1igYkId7tU/LWufjpTaNeAnT+pHb+Srycx33xMUYNPFU5J19elDNDyBLbXwLVEhDg2IClZeea12F1RoEuEmAACA3+h0QLRz50799Kc/1RdffNHmayEhIbrtttv0u9/9jm1lgB/627qD+t+3dkqSZo9O0KMXj1dseJDFVbXv6LDINE2VVNUrq7Ra2aXVKq6sV2Vdo2oampRTVqOcspp23sGuwMRhajKl2LBAjUuO0ilDYnXZ1BT1Cwvs8e8H8Ja0xAjlldcqk4AIAADAr3QqINq8ebNmz56t8vJy98lkR6uurtZDDz2k7du367///a/XigTQu5mmqWdX7dNjH2ZKkq4+fYh+s3C0bDbrjpbvDMMwFBcepLjwIPd2sPpGl45U16u0ql5lNQ1yuVr/N6/qSLFWv/SI3lzypM45/eQePRkN6E4jEsO1ek+RCpx1OlJdr5hQAk8AAAB/0OGAqLGxUYsWLVJZWVmbY+yPZpqm3n77bT3zzDNavHix1woF0DuZpqlH3s/Qc5/tlyT94uwRumX2CJ8PTAIDbEqMDFZiZHC7Xz+8p0Af7FmnuFC7z3+vwNFCAwM0KCZUh0qrtbugQqcMibW6JAAAAPSADu8De/PNN5WRkXHCP4QMw5BpmvrDH/7Q7iojAH2Hy2Xqnjd3uMOh3ywcrVvPGUlgAvi4tP4RkqTM/Ap+lgMAAPiJTgVERzNNU6Zpql+/fnI4HG1+gTx8+LA2bdrklSIB9E6PfpihVzdkyTCkRy8ep2tnDLW6JABeMDQ+THaboSPVDSqqrLO6HAAAAPSADgdEX331lXt1kCRddNFFysnJUVFRkaqrq/Xf//5XcXFxbe4B0De9uiFLz61uXjn0fz+YoMumDrK4IgDeEhRg19C45lNJM/MrLK4GAAAAPaHDAVF+/rfHOKempurVV1/VgAEDmt/EZtMFF1ygJ554QqZpureXFBQUeLlcAL3BZ7uL9Nu3dkiSbp09UhdPSba4IgDeNjKxeZvZ7oJKtpkBAAD4gQ4HRE6nU1Jzj6HTTz9dgYFtTzWZO3duq9cVFXzqCPQ1mfkV+tkrm9XkMnXRpIG6+ezhVpcEoBsMjg1VYIBNlXWNyi2rtbocAAAAdLMOB0RHf3oYHx/f7jWxsa1POnG5XF0sC0BvVOis1dUvbVRlXaNOGdJPD188jobUQB8VYLdpeHy4JCmjwGlxNQAAAOhuHQ6IAPi3usYmXfe3Tcopq9HQuDA99z9TFBRgt7osAN2o5TSzvQWVanKxzQwAAKAvC+jKTU6nU1lZWR5fN2gQTW0BX/HYB5naerhc0aEOLb1qqqJD224zBdC3JMeEKDTQrur6Jh0qrdLQuHCrSwIAAEA36XRAZJqmli5dqqVLlx73GknHvc4wDDU2NnZ2egAWWJVZqBc+PyCp+cSy1NgwiysC0BNshqGRiRHakl2m3fmVBEQAAAB9WJe2mJmmecxHR6/jRBTANxRV1OmOf2+VJF15aqpmn5RocUUAelLaN6eZ7SuqVEMTvQUBAAD6qi4FRIZhHPPRkesA+AaXy9Tt/96q4sp6jeofobsWjLa6JAA9LDEySFEhDjW6TO0vqrK6HAAAAHSTTgdEJwp4CIKAvmPJFwf02e4iBQXY9PQVkxTsoCk14G8Mw3CvIsosqLC4GgAAAHSXTgVEJ9oy1tEHgN5v++FyPfpBhiTpt+eepBHf/IEIwP+0nGZ2qKRKNQ1NFlcDAACA7tDhJtUHDhzozjoA9CL1jS7d/u8tamgyNXdMon50CicOAv6sX1ig4sODVFRZp72FlRo3MMrqkgAAAOBlHQ6IUlNTu7MOAL3IXz/bp90FlYoNC9QjF41nyygApfWPUNHeOmXmVxAQAQAA9EFdalINoO86UFylpz7dK6l5a1lMWKDFFQHoDUYmNh9xn1NWo4raBourAQAAgLcREAFwM01T9/x3u+obXZoxIk4XTEyyuiQAvUREsENJ0cGSpN0FlRZXAwAAAG8jIALg9sbmHK3dV6KgAJsevHAsW8sAtNJymtluTjMDAADocwiIAEiSSqvq9eC76ZKkX8weodTYMIsrAtDbjEiIkM2QCivqVFpVb3U5AAAA8KION6kG4NuysrJUXFx8zK8//WWZjlQ3KDUqQCeHl2vz5s09WF2zXbt29ficADouJNCuQf1CdbCkWpkFFTp1aKzVJQEAAMBLCIgAP5CVlaVRo0erprq63a8HpYxT/x8+LNN0af2ffqFT7s7s4Qpbq6ykvwnQW6X1j2gOiPIrNH1IP7aiAgAA9BEERIAfKC4uVk11tX5052NKHDSs1ddMU/okP0DlDdKwCFM/uOdhi6qUdn25Wu+//KRqa2stqwHA8Q2NC1eArVDlNQ0qrKhTYmSw1SUBAADACwiIAD+SOGiYkkeMaTWWnutUeUOBAgNsmj1pqEIC7RZVJxVk7bNsbgAdExhg09D4MO0uqFRmfgUBEQAAQB9Bk2rAjzU0ubR2f3NfommD+1kaDgHwHUefZuYyTYurAQAAgDcQEAF+bPOhI6qqa1JkcIAmpERZXQ4AH5EaG6agAJuq6puUc6TG6nIAAADgBR3aYtavXz/385/+9Kd66KGHJEn333+/e3z69OmaM2eOl8sD0F2q6hr1VdYRSdLpw+MUYCMvBtAxdpuhEQnh2pHrVEZ+hVL6hVpdEgAAADzUoYCorKxMkmQYhmpqvv2k8He/+5379JJf/OIXBESAD1m3v0QNTab6RwZrREK41eUA8DGj+kdqR65TewsrdWZavBx2QmYAAABf1uHf5o51jK1J7wHA5xRV1GlnrlOSNGNEHMdUA+i0pOhgRQYHqL7Jpf1FVVaXAwAAAA91+uO+goKCVq/5wxLwPZ/vbW5MPSIhXEnRIRZXA8AXGYahUf0jJUm78p0WVwMAAABPdWiLWWhoqGpqamSapt588039/ve/V3Jycqtr0tPT9be//a1Tk//4xz/u1PUAPJdVWq2s0mrZjObeQwDQVaMGROjLg6XKKqlWVV2jwoI69GsFAAAAeqEO/SY3cOBA7d27192D6Le//a37ay1bzFasWKEVK1Z0anICIqBnmaa0fn+JJGncwChFhTgsrsj37dq1y+oSTsgXaoRvigkN1ICoYOWV1yqzoEKTB8VYXRIAAAC6qEMB0RlnnKE9e/bIMAwZhtFu36HO9iJiaxrQ8wprDeWV18puM3Ty4H4nvgHH5CwtkiQtWrTI4ko6rrKy0uoS0AeN6h+hvPJa7cpzEhABAAD4sA4FRDfddJP+/ve/q6mpSdK34c7RoVBnAh8aWwPWSC+3S2pePRTOVhCP1FQ291xZeMM9Shs/xeJqjm/Xl6v1/stPqra21upS0AeNTIzQ6t1FKq6sV1FFneIjgqwuCQAAAF3Qob8QJ06cqCVLlmjx4sWqqKho9xpCH6B3Cx4yWaX1NgXYDJ2cyqf83hKblKrkEWOsLuO4CrL2WV0C+rBgh11D4sK0r6hKGflOxUfEW10SAAAAuqDDSwgWLVqkc889Vx999JEOHDig2tpa3Xfffe6VQ6eccormzp3bbYUC6DrTNBV9xo8kSeOSo2gkC8CrRg+I/CYgqtDpw+Jks7GNHAAAwNd06q/E6OhoXXrppe7X9913n/v59OnTde+993qvMgBeszm/TkFJabIbpqbQIwSAlw2ODVOww6bq+iZlH6lWamyY1SUBAACgkzxaRjBo0CD3CqJ+/Wh4C/RGpmnqnzuamxMPDXexegiA19lthkYmRmjb4XLtyqsgIAIAAPBBHv2lePDgQS+VAaC7fJpRqH1HGuSqr9XISJvV5QDoo0b3j9S2w+XaV1SpusYmBQXYrS4JAAAAneD1pQT5+fnauHGjCgsLJUkJCQmaOnWq+vfv7+2pAJyAaZp68pM9kqSKzcsVPPx8iysC0FclRgYpJtShI9UN2ltYqTFJUVaXBAAAgE7wWkD0wQcf6He/+502btzY7tenTZum3/3udzSyBnrQF3tLtO1wuYLshpxfviFdSkAEoHsYhqGTBkTqi30l2pnrJCACAADwMV7Zb3LXXXdp4cKF2rhxo0zTbPexYcMGLViwQHfffbc3pgTQAc991ny8+eyhIXLVOC2uBkBfN3pApAxDyiuv1ZGqeqvLAQAAQCd4HBA99dRTevTRR91BkGEYx3yYpqlHH31UTz/9tDdqB3AcO3LKtWZPsew2Q+eNpGEsgO4XFhSgwd80qN6ZRygNAADgSzwKiIqLi3XXXXe1CoEktVk91KIlJLrrrrtUUlLiWeUAjuuvn+2XJJ07foASwji5DEDPOGlApCRpV55TLpd5gqsBAADQW3gUEL3wwguqqalxvzZNUw6HQ6effrouv/xyXX755Tr99NPlcDhaBUU1NTV6/vnnPZkawHFkl1br3e15kqTrvzfU4moA+JMhcWEKcdhVXd+kg6VVVpcDAACADvIoIPrkk08kyR3+XHLJJTp8+LDWrFmjV199Va+++qrWrFmjnJwc/eAHP3BvQTv6XgDe9+LnB9TkMjVjRByNYgH0KLvN0KgBEZKk9Fy2mQEAAPgKjwKi9PR0d+AzbNgwvfrqq4qLi2tzXWxsrF599VUNGzZMUnOgtGvXLk+mBnAMpVX1+ufGLEnSjTOHWVwNAH/Uss3sQHGVqusbLa4GAAAAHeFRQFRaWiqpubfQWWedJbvdfsxrAwICdNZZZ7lXG7XcC8C7/rbuoGobXBo7MFKnDYu1uhwAfiguPEiJkUFymVJGfoXV5QAAAKADPAqIAgK+bXxbW1t7wuvr6urcz48XJgHompr6Jr289qAk6YbvDXOv8AOAnjZmQPP21p25zlZ9CAEAANA7eRQQtWwnM01T77//vpzOY/caKC8v13vvvef+g7W9rWgAPPPvr7J1pLpBKf1CNH9sf6vLAeDHRvYPl91mqLSqXgXOuhPfAAAAAEt5FBBNmjTJ3Xi6uLhYZ511lr744os2133xxRc666yzVFxc7L5+0qRJnkwN4DtcLlNLPj8gSbr2jKEKsHv0rzcAeCQowK4RCeGSpJ155RZXAwAAgBMJOPElxzZ//ny9+eabkppXEW3evFnf+973FBERoeTkZEnS4cOHVVHR3H/AMAz3MvMFCxZ4MjWA71i9p0gHS6oVERygH0xJtrocANBJAyKVkV+h3fmVmjE8XoEBBNcAAAC9lUe/qf3P//yPEhISJMm9dcw0TTmdTqWnpys9PV1OZ3PvgaP7DyQkJOh//ud/PJkawHe89MVBSdKlJ6coLMij7BcAvCI5JkRRIQ7VN7m0u4Bm1QAAAL2ZRwFRcHCwlixZ4n5tGIb70d6YaZqy2WxaunSpgoKCPJkawFH2F1Vq9e4iGYb041NTrS4HACQ1/w4wbmBzs+rtOWwzAwAA6M08Xuu9YMECLVu2TEFBQa1WCX03KDJNU8HBwVq2bJnmzZvn6bQAjvK3dYckSbPSEpQaG2ZxNQDwrZMGRMpuGCqsqFOB88QnngIAAMAaXmkGcMUVVyg9PV3XXnut4uLi3FvKWh5xcXG67rrrlJ6erssvv9wbUwL4RmVdo17/6rAk6SenDba2GAD4jpBAu4YnNjerZhURAABA7+W1RiWDBw/WX//6V/31r3/VwYMHVVRUJEmKj4/X4MGDvTUNgO/4z1eHVVnXqKHxYTpjeJzV5QBAG+MHRikzv0KZ+RWaMTxOQQ671SUBAADgO7qlk+3gwYMJhYAe4HKZenndQUnNq4dsNuP4NwCABQZEBSs2LFAlVfXKyK/QhJRoq0sCAADAd3DeLODD1uwt1v6iKoUHBeiiyRxtD6B3OrpZ9bac8lY9CwEAANA7EBABPuzltQclST+YkqxwjrYH0IuNGhChAJuh0qp65ZbRrBoAAKC3ISACfNTB4iqtzCyUJF1Jc2oAvVxQgF1p/SMk0awaAACgNyIgAnzUq19myTSlM9PiNSSOo+0B9H4t28z2Flaqur7R4moAAABwNAIiwAfVNTa5j7ZfdEqqxdUAQMckRgYrMTJITaap9Dyn1eUAAADgKAREgA/6aGeBSqvq1T8yWGemxVtdDgB0mLtZ9eFyuVw0qwYAAOgtCIgAH/SPL7MkSZdOTVGAnX+NAfiOtMQIBTtsqqht1P7iKqvLAQAAwDf4yxLwMQeKq7R2X4kMQ7psaorV5QBApwTYbe5VRF9nH7G4GgAAALToEwFRdXW1nn32Wc2ZM0cpKSkKDg5WXFycJk6cqDvvvFMZGRk+MefWrVt19913a9q0aRowYIACAwMVHx+vMWPGaOHChXrsscf09ddfe/17gW/558bm1UNnjozXwOgQi6sBgM4bnxwtmyHlltWqwMmR9wAAAL1BgNUFeGrVqlW68sorlZWV1Wq8rq5OJSUl2rp1q5544gndeeeduv/++2UYRq+bs6ysTDfddJNeeeUVmWbrfgzFxcUqLi5Wenq63nvvPcXGxqq4uNjj7wG+qb7Rpdc3NTenvmLaIIurAYCuCQ8K0IjECGXmV2hLdpnmjulvdUkAAAB+z6dXEK1cuVILFixoE9R8V0NDgx588EHddNNNvW7O3NxczZgxQ8uWLWsTDgHftSK9QCVV9UqMDNJZoxKsLgcAumxSSrQkaXdBharqOPIeAADAal5ZQdTY2Kjly5frs88+04EDB1RRUSGXy3XcewzD0CeffNLlOZ1OpxYtWqSamppW42lpaZo5c6aysrL00UcftarjmWee0Zw5c3T++ef3ijmbmpp0ySWXaMeOHW2+NmnSJE2YMEGhoaEqLi7W1q1blZmZ2aW60Xe0NKe+7GSaUwPwbYmRwRoQFay88lptO1yuU4fFWl0SAACAX/M4IFq7dq0WLVqkQ4cOdfge0zQ93ur1xz/+Ubm5ua3GZs+erffee08Oh0OStHTpUl199dWtrrntttt03nnndWl+b8/55JNPau3ata3GhgwZoldffVXTp09vM/+hQ4f01ltvdbpu9A0Hi6v0+d5iGUbz6WUA4OsmpUQrrzxf23PKNXVwjNXlAAAA+DWPliDs3r1bc+fO1aFDh2SaZoce3mCapl566aU244888og7qJGkq666SmPGjGl1zb59+7R69WrL52xsbNQTTzzRaiw0NFQrVqxoNxySpNTUVN18882drh19wz83ZkuSZo6MV3JMqMXVAIDnhsWHKyI4QDUNTcosqLC6HAAAAL/mUUD00EMPqaqqSlLzlrGOPLxhx44dbXoAxcTEaMqUKW2unT17dpux5cuXWz7nxx9/rMOHD7cau/rqqzVs2LBO14a+r77Rpde/ag6IaE4NoK+w2QxNSI6WJH2dXSZa8QEAAFjHo4Do008/bRX69NQqos2bN7cZS0tLa/fa0aNHtxnrylHx3p5zzZo1ba4555xz9OKLL2rWrFnq16+fAgMDNWDAAJ133nl65ZVX1NTU1Om60TeszCxUcWW94iNoTg2gbxmTFCmH3VBJZb2K6rzzQRIAAAA6z6MeREVFRZK+7Sl0ySWX6Prrr9eAAQMUHBwsu93ulSK/a+/evW3GEhMT2702IaHtH9Pt3d/Tc7YXUv385z9XdnZ2q7H8/HwtX75cy5cv19NPP6033nhDSUlJx621rq5OdXV17tdOp/O416P3e/2r5tVmF00eKAfNqQH0IcEOu04aEKmth8uV6eye3xsAAABwYh4FRP369VNBQYEMw9Cpp56q1157zVt1HVd5eXmbsbCwsHavDQ1t26ulvft7es6WcO1o3w2HvmvDhg2aO3eu1q1bp/Dw8GNe9/DDD+u+++477nvBdxRX1mllRqEk6QeTky2uBgC8b/KgGG3LKVdhrU2B/UdYXQ4AAIBf8mgpwvTp091bxiZMmOCVgjqiurq6zVhAQPtZ19ENpFtUVlZaPmdZWdkx55oxY4auvfbadptV79ixQ48++uhxa73rrrtUXl7ufpwoeELv9ubXOWp0mZqQEq0RiRFWlwMAXhcZ4tCob/77FjX9EourAQAA8E8eBUQ33nij+/mmTZs8Lqaj2luh09DQ0O617Y0fb/VNT80ZGBjY7r1/+MMf9Nlnn+n555/XunXrdPfdd7e55vnnnz9uL6egoCBFRka2esA3mabp3l72gymsHgLQd508uJ8kU6Fppym7vP2frwAAAOg+HgVEc+bM0ZVXXinTNLVp0yb97//+rxobG71V2zFFRUW1GWs5Ta0j4+3d39NzthfaREdH6xe/+EWrsbvvvltBQUGtxgoKCrR///4T1gzftzPXqYz8CgXabTp//PF7TwGAL+sXFqikkOYPP97IaP/nKwAAALqPRz2I/va3v2nGjBl6//33VVhYqIceekhLlizRD37wA6Wmpio2Nva49//4xz/u0rzDhw9vM1ZQUNDutYWFhR26v6fnTE1N1fr161uNDRkypM3KorCwMKWkpLRpcl1YWKhhw4Z1qHb4rpbVQ+eMSVRUaNutiwDQl4yKbFJujU1rsmqUXVqtlH5tV+8CAACge3gUEP3kJz9xH3NvGIZM01Rubq6efvrpDt3f1YBoypQpbcYyMzPbvXbXrl1txiZPnmz5nFOmTPGoqfexGmSj76hvdOmtLTmSpEvYXgbAD8QEmao5sFkhQybruc/26cELx1ldEgAAgN/wynnZpmm6j7pvCYpO9PDEmDFjlJqa2mqsrKys3T5IH3/8cZuxhQsXWj7n3Llz21xz4MAB1dfXtxqrqqpq02Tabrdr8ODBHS0dPurTjEIdqW5QYmSQZoyIt7ocAOgR5ev+JUn616bDKnTWWlwNAACA//BKQNQSDH339bEe3pjvyiuvbDN+9913t2oQvXTpUqWnp7e6ZtiwYZo5c2arsTPPPLNNjQcPHuzWOcePH99mVVJZWZmefPLJVmO///3vVVdX12rsjDPOoPG0H2jZXvb9Scmy2zz/9wYAfEFd9g6lxTpU3+jSi58fsLocAAAAv+FxQNSR1ULeXD3U4vbbb1dSUuumvStWrNC4ceN0ww03aMGCBbr22mvb3Pf44493OaTy9pyPPfZYm7Ff/epX+t73vqfrrrtOp556qn7/+9+3uaa9k83QtxRV1GllZnMvqx9MGWhxNQDQsy4e3Xzy57L1h3Skqv4EVwMAAMAbPOpBdO+993qrjk6LjIzUsmXLtHDhQtXU1LjHMzMzj9kbaPHixTr//PN7zZyzZs3SXXfdpYcffrjV+Jo1a7RmzZp277nttts0Z86cLn4H8BVvbclRk8vUxJRoDU+IsLocAOhRUwYE6aQBkUrPc+q5z/br1/NHWV0SAABAn+ezAZHUHLC8++67uvLKK9v06Tmaw+HQr371Kz3wwAO9bs7f//73ioyM1H333afa2mP3WnA4HPrd736nu+66q8u1w3e0bC/7Ac2pAfghwzB0x9yRuvqlTXpp7QFdffpgJUQGW10WAABAn+ZRQNQbzJo1SxkZGVq6dKnefPNNpaenq7i4WOHh4UpOTtacOXN0zTXXaNQo73366O05f/3rX+uyyy7Tiy++qPfff1+HDh1SeXm5oqKiNGLECJ199tm64YYblJKS4rXvAb1XRr5TGfkVCrTbdN74pBPfAAB90Ky0BE1JjdFXh47o6U/36oELx1pdEgAAQJ/m8wGRJIWGhmrx4sVavHhxl+5ftWpVj8/5XUOGDNGDDz6oBx980CvvB9/15te5kqQz0+IVFeqwuBoAsIZhGPrl3DRd/tf1+seXWbpuxlANig21uiwAAIA+yyunmLX44IMPdOONN2rKlClKSUlRSkqKpkyZohtvvFEffPCBN6cC+iSXy9TbW3IkSd+fRHNqAP5t+tBYzRgRp0aXqf/3yW6rywEAAOjTvLKCKDs7W5dddpk2bNggSa1OKsvJydGWLVv0/PPP65RTTtFrr73GVingGL48WKrc8lpFBAdo1qgEq8sBAMv9cm6a1uwp1n+/ztGNM4dpZCKN+wEAALqDxyuIsrKyNG3aNG3YsKHVMfaGYbiPdm8ZX79+vU455RRlZWV5Oi3QJ731zeqh+WP7K9hht7gaALDe+ORozRvTX6YpPf4Rq4gAAAC6i8cB0eWXX66CggJJ34ZCLcFQe2P5+fn64Q9/6Om0QJ9T19ikd7flSZIunMj2MgBocfuckTIM6YOd+dqaXWZ1OQAAAH2SRwHRRx99pPXr17cKhFpWC333Icl93bp16/TRRx95MjXQ56zKLJKztlGJkUE6ZWis1eUAQK8xIjHC3ZftsQ8zW21lBwAAgHd4FBD961//cj83TVNBQUG65ZZbtGLFCmVkZCgjI0MrVqzQLbfcouDg4GPeC+Db7WXnT0iS3Wac4GoA8C+3zh4ph93Q53uLtTKz0OpyAAAA+hyPmlSvW7dOhmHINE0FBgbq008/1fTp01tdM3LkSJ199tm69NJLNXPmTDU2Nso0Ta1bt86jwoG+xFnboI93Nf/BcwHbywCgjZR+obr6jCF6bvV+PbB8l84YHq/AAK8exgoAAODXPPrNKi+vuV+KYRiaN29em3DoaNOnT9f8+fPdy8Jb7gUgfbA9X/WNLg1PCNeYpEirywGAXunns4YrLjxIB4qr9PLag1aXAwAA0Kd4FBBVVVW5nycnJ5/w+oEDv10ZcfS9gL9785vtZd+fNLBVTy8AwLcigh361dw0SdJTn+xRUUWdxRUBAAD0HR4FRFFRUe7nO3fuPOH1R19z9L2AP8svr9W6/SWSmvsPAQCO7QdTkjVuYJQq6hr1x48yrS4HAACgz/AoIEpNTXWfUvbZZ5/p73//+zGvffnll/XZZ5+5V0ekpqZ6MjXQZ7yzNVemKZ2cGqOUfqFWlwMAvZrNZuje806SJL22KVs7csotrggAAKBv8KhJ9YwZM/TVV1+5G1X/5Cc/0csvv6yFCxcqJSVFkpSdna333ntPn376qfs6wzA0Y8YMr3wDgK97Z1uuJOmCiaweAoCOOHlwP50/IUlvb83V/e+k67UbprM9FwAAwEMeBUQ/+clP9P/+3/+TJHf4s3LlSq1cubLNtS3B0NH3Av7uUEmVth0ul82Q5o8bYHU5AOAzfj1/lD5Kz9eXB0u1fFuezmOLLgAAgEc82mI2fvx4XX755e6TyVpCovYeLeGQYRi64oorNH78eM+rB3zc8m3Np/mdNixOceFBFlcDAL4jKTpEP505XJL0wPJ0ldc0WFwRAACAb/MoIJKkv/71r5o8eXKrkKi9h9S8imjy5Ml67rnnPJ0W6BNaAqJzx7N6CAA664aZQzU0LkyFFXV69IMMq8sBAADwaR4HROHh4VqzZo2uu+462e32Y64gstvtuv766/XZZ58pLCzMG7UDPm1fUaV25TkVYDM0d0x/q8sBAJ8T7LDr9xeNkyS9uiFLXx4otbgiAAAA3+VRD6IWISEheu6553TPPffonXfe0caNG1VUVCRJio+P19SpU3Xeeedp0KBB3pgO6BOWb21ePXT68DjFhAVaXA0A+KbpQ2N1+dQU/XNjtu56Y5ve+8UMBQXYrS4LAADA53glIGoxaNAgLV682JtvCfRZ725vPr2M7WUA4Jm75o/Wx7sKta+oSs+u3KdbzxlpdUkAAAA+x+MtZgA6b3dBhXYXVCrQbtMctpcBgEeiQh267/wxkqRnV+3VnoIKiysCAADwPQREgAWWb21ePfS9kXGKCnFYXA0A+L4F4/rr7FEJamgy9es3tsvlMq0uCQAAwKd0aIvZ1Vdf7X4+Z84cXX755W3GO8swDL344otdvh/wVaZpHnV6WZLF1QBA32AYhh64cKzWP75aXx06opfXHdRVpw+xuiwAAACf0aGA6KWXXnIfVR8dHe0OiI4e7wzTNAmI4LfS85zaX1yloACbZp+UaHU5ANBnJEWH6NcLRuu3b+7QI+9naMaIOA1PiLC6LAAAAJ/glS1mxzra/lgPwJ+1rB6alZag8CCv9okHAL+36JRB+t7IeNU1unTLa1tU3+iyuiQAAACf4JWAyDCMTj0Af9W8veyb08smcHoZAHibYRh67AfjFR3q0I4cp576ZI/VJQEAAPiEDgdEx1r909nVQ6wggj/bnlOu7NIahTjsOmtUgtXlAECflBgZrN9/f5yk5lPNvjpUanFFAAAAvV+H9resXLnS/Tw5ObndcQAn9u725u1lZ41KUGgg28sAoLssGDdAF00aqDe+ztGtr23V+7+YoTC29QIAABxTh35TmjlzZqfGAbRlmqY+2JEvqfkPFwBA9/rdBWO04UCpskqr9cDydD1y8XirSwIAAOi1vNKDCMCJpec5daikWkEBNp2ZFm91OQDQ50UGO/R/l0yQYUj/3Jitd785JAAAAABtebTW+qyzznI//8EPfqCf/exnx71++/bt2rPn22aRF110kSfTAz6lZfXQmWnxbHMAgB5y6rBY3ThzmP68ap/u/M82jUmK1OC4MKvLAgAA6HU8+it11apV7lPJJk6ceMLrlyxZoqeeekpS8ykjjY2NnkwP+JT3vuk/NH8s28sAoCfdfs5IbTxQqk2Hjmjxq5v1n5+epmCH3eqyAAAAepUe32LGaWbwR3sKKrSvqEqBdpvOGs3pZQDQkwLsNj39w0mKCXVoZ65Tv39vl9UlAQAA9Do9GhDV1tb25HRAr/H+N9vLzhgRp8hgh8XVAID/GRAVoscvmyhJ+tu6Q+5VnQAAAGjWYwFRU1OT1q9f796SBviTb7eX9be4EgDwX7PSEvTTM4dJku58fZsOlVRZXBEAAEDv0akeREc3pf6u119/XVu2bGn3a01NTdq7d6/y8/PdYyEhIZ2ZGvBZB4qrlJFfoQCboXNOSrS6HADwa0f3I/rZK/QjAgAAaNGpgOjoptQtWnoJ5eTkKCcn55j3tlzXcn9ycnKnCgV81fs7mlcPnTosVtGhgRZXAwD+raUf0cKnPtfOXKfu/u92/fGSCV1a4ZyVlaXi4uJuqNL74uLiNGjQIKvLAAAAvViXTjFrr8H0iZpOt/ziZZqmDMPQ7NmzuzI14HNajrfn9DIA6Jhdu7q/ifQvpobrvtWlemNzjvqZFVowIqxT9+fl5ekHl1yi2pqabqrQu0JCQ5WxaxchEQAAOKYuBURHhz3fHeuImJgY/epXv+rK1IBPyS6t1rbD5bIZ0pwxbC8DgONxlhZJkhYtWtQj80VM/b76nXWNnt9Uqgdvv1F1Oemdfo/v33y/hqSN7YbqvKcga59eefSXKi4uJiACAADH1OmA6FgrhTpybH1sbKwWLFige++9l19Q4Bc+3Nm8emjakH6KCw+yuBoA6N1qKp2SpIU33KO08VO6fT7TlL4sadLh6gAN/vGjOqt/g0I6+JvRri9X6/2Xn1REbH8ljxjTvYUCAAD0gE4FRAcOHHA/N01TQ4cOda8cuuqqq/Tb3/623fvsdrvCw8MVHR3d9UoBH9RyvP2CcWwvA4COik1K7bHQpX+TS69tzFZJVb2+rorUxZOTZbedeFV0Qda+HqgOAACg53QqIEpNTW0z1rJyKDIyst2vA/6qwFmrrw4dkSTNHcPx9gDQGznsNp07foD+uTFbeeW1WpVZqLNGJXSpaTUAAIAv61IPohb33nuv+/n06dM9LgboSz5KL5AkTR4UrcTIYIurAQAcS3RooOaO6a+3t+ZqR65TcRFBmpAcbXVZAAAAPcprARGA1j76pv8Qq4cAoPcbEhem04fH6ou9JVq9u0j9QgOV0i/U6rIAAAB6jEcBUU1NjTZu3Oh+HRERoUmTJrV77ddff62Kigr366lTpyokJMST6YFeq7ymQev2lUiS5hAQAYBPmDIoRsWV9crMr9B72/N0+bRBigpxWF0WAABAj7B5cvPbb7+tWbNmuR+rV68+5rXr1q1rde3bb7/tydRAr7Yyo1CNLlMjE8M1JC7M6nIAAB1gGIZmj0pQYmSQahtdemdrruobXVaXBQAA0CM8Cog+/PBDmaYp0zQVERGhG2644ZjXXnPNNYqOjnY3tf7www89mRro1T5Kb95eNuckVg8BgC8JsNt07rgkhQXaVVJVrw935rt/dwEAAOjLPAqIvvrqK0nNn7jNmDHjuFvGgoKCNGPGDPfrzZs3ezI10GvVNjRpVWaRJPoPAYAvCg8O0Lnjk2S3GdpfXKV1+0usLgkAAKDbeRQQ5efnu4+B7cgR98nJyZIk0zSVl5fnydRAr/X5nmJV1zcpKSpYYwdGWl0OAKAL+kcFa/aoBEnSxoNHlJlfcYI7AAAAfJtHAVFZWZn7eXV19Qmvr6mpafdeoC9xby8b098doAIAfM+oAZGakhojSVqxq0AFzlqLKwIAAOg+HgVEkZHNqyNM09S6detOeP0XX3zh/oO55V6gL2lscunjXYWSpDljEi2uBgDgqdOGxWpwbKiaXKaWb8tTVV2j1SUBAAB0C48CooEDB7qf7969Wy+99NIxr12yZIl2797tfp2UlOTJ1ECvtOnQEZVW1Ss61KFpg/tZXQ4AwEM2w9C8sf0VE+pQZV2jlm/LU6OLk80AAEDf41FAdOqpp8o0TRmGIdM0df311+vuu+/WoUOH3NccOnRId999t2688Ub3dYZh6NRTT/W4eKC3+WhngSTp7FGJCrB79K8XAKCXCAqw67wJSQoKsCnfWatPMwrFuWYAAKCvCfDk5osvvljPPfecpOaTzBobG/Xoo4/q0UcfVWBgoAzDUF1dnSS5g6Gj7wV8XVZWloqLiyU1/3/8na+bTy8bEVLVq07q27Vrl9UlAIBPiwkN1Pyx/fXWllztyqvQELFKFAAA9C0eBUSzZ8/WySef3Oq4e9Ns/kytJRhq0RIOGYahyZMn65xzzvFkasByWVlZGjV6tGq+adDuSBiipKuelquhVj+76CyZjXUneIeeV1lZaXUJAOCzUmPDNGNEnD7bU6wDSlDwkMlWlwQAAOA1HgVEkrRs2TJNnTrV/Yfn8U5tMk1TERERWrZsmafTApYrLi5WTXW1fnTnY0ocNEzpZXbtckrJkYG65Ml/WF1eK7u+XK33X35StbWcwAMAnpiYEq3iynql5zkVf/6vVO2qsrokAAAAr/A4IBo5cqRWr16t888/X4cPH5bUNiRqWVU0cOBAvfXWW0pLS/N0WqDXSBw0TMkjxmj1hkOS6jV2yAAlD+hdp/QVZO2zugQA6BMMw9CsUfHKzstXRXC4dtQFa0pDk4IcdqtLAwAA8IhXuuhOnDhRmZmZ+sMf/qBTTjlFNptNpmnKNE3ZbDZNmzZNjz32mHbv3q3Jk1mOjb6nvKZBxZX1MgxpcFyY1eUAALpRgM2m0TqsRmeRaswAvb8jXy6TttUAAMC3ebyCqEVISIjuuOMO3XHHHXK5XCopKZEkxcbGymbjNCf0bQeKm7cYJEWFKIRPkQGgzwtUk4reeFADf/KEDpVW64u9xZoxIt7qsgAAALqsW5Ibm82m+Ph4xcfHEw7BL+wrau7BNTSe1UMA4C/qC/YpLbBMkrQ5q0zpeU5rCwIAAPAA6Q3goXqXlFNWI0kayvYyAPArCQG1mja4+cj7T3cVKq+8xuKKAAAAusZrW8wkafv27dq8ebOKi4tVXV3tbk59LP/7v//rzekBS+TX2GSaUmxYoKJDA60uBwDQw6YP7aeSqjrtK6rS8m15unxqiiKCHVaXBQAA0CleCYjeeOMN/frXv9a+fZ07KYmACH1BXk3zqX1DWD0EAH7JMAzNOam//vVVtkoq67V8W54umZKsADsLtQEAgO/w+DeXZ599Vpdccon27dvnPrmsIw+gT7AFKL+m+V+jYfHhFhcDALBKYIBN541PUrDDpsKKOq3YVcDvOwAAwKd4FBDl5OTojjvucP8CZBhGhx5AXxE8aKwaTUOhgXYlRgZZXQ4AwEJRIQ4tHDdANkPaXVCpTYeOWF0SAABAh3kUEL300kuqra11hz4nWjHE6iH0NSHDp0tqbk5N+AkASI4J1cyRzcfdr91Xov3fnHIJAADQ23kUEH322WeS5A59Lr74Ys2ePdv9dcMw9Je//EU/+clP3KuH5s6dq6VLl2rJkiWeTA1YzjRNhY6YJkkawvH2AIBvjE+O1riBUZKkD3bmq6SyzuKKAAAATsyjgCg9Pd29aiI5OVn/+Mc/NGbMmFbXXH/99VqyZIleeeUVmaapjz76SAUFBbryyis9mRqw3IGyRgVEJshumBoUE2p1OQCAXmTmyHglR4eoocnUO9vyVNPQZHVJAAAAx+VRQHTkSPPeesMwdOqppyog4NiHol122WU65ZRTZJqm7r77bn3xxReeTA1YbmNurSQpMdjkpBoAQCt2m6EF4wYoMjhA5TUNem97nppcbLMHAAC9l0d/1TY0NLifx8c377f/bkhUVVXlfj5+/HhJzVtznnrqKU+mBiz3ZU5zQDQgxGVxJQCA3igk0K7zJiTJYTd0+EiN1uwpsrokAACAY/IoIIqJiXE/d7ma/0gOC2vdiyUzM9P9vKSkRFJzQLRu3TpPpgYslVtWowNljTJdTQREAIBjigsP0twx/SVJWw+Xa3tOucUVAQAAtM/jgKilQXVL+JOYmChJ7t5E9913n44cOaKVK1fqvffec48XFhZ6MjVgqY93FUiS6nIyFGS3uBgAQK82LD5cpw6NlSStyixUzpEaiysCAABoy6OAqH///u7nxcXFkqTRo0e7x0zT1PLlyxUXF6fZs2ertrbW/bXo6GhPpgYs9fGu5oCzZu+XFlcCAPAFUwfHaGRCuFym9O72PDlrGk58EwAAQA/yKCCaNGmS+3lGRoYk6bTTTlN4eLik5lVEpmm6Hy2vDcPQ6aef7snUgGWq6hq1fl/zirnqvRssrgYA4AsMw9DskxIVHxGkmoYmvbMtV/WNbFEGAAC9h0cB0dSpUyU1rxTKy8tTenq6AgMDdd1117m3nhmG4X64J7XZdMcdd3gyNWCZNXuKVd/kUv9wuxpLD1tdDgDARzjsNp03foBCHHYVV9bro/R89+9LAAAAVjv2ufQdMHPmTN1+++3u1zU1zXvq77//fq1du1YbNjSvrmgJh0zTlM1m0xNPPKFTTz3Vk6kBy3ya0dx/6OQBwWL9EACgMyKCHTp3/AD9Z/Nh7Suq0oYDpZr+TX8iAAAAK3kUECUlJemxxx5rMx4WFqY1a9boueee09tvv62srCwFBgZqypQpWrx4saZMmeLJtIBlXC5Tn2Y0H1N8clKQxdUAAHxRUnSIzhqVoI93FWrDgVLFhgVqRGKE1WUBAAA/51FAdNw3DgjQ4sWLtXjx4u6aAuhx23LKVVxZp4igAI2OC7S6HACAjxqTFKXiynptyS7TR+kFig4NVHwEHzwAAADreNSD6KyzznI/zjnnHFVXV3urrk6prq7Ws88+qzlz5iglJUXBwcGKi4vTxIkTdeedd7obaPvKnDfffHOr3k0tj9/97nfe+wbQJZ98c7z990bGy2E3TnA1AADHNmN4nAb1C1Wjy9Q723JVXd9odUkAAMCPebSC6LPPPnM3V5w5c6ZCQ0O9UlRnrFq1SldeeaWysrJajdfV1amkpERbt27VE088oTvvvFP3339/q2bZvXHO9evX65lnnvG4RnSPT7453v6sUQmSCq0tBgDg02w2Q/PH9tc/N2arvKZB727L00WTk2W38QEEAADoeR6tIIqPj3cHRCNHjvRKQZ2xcuVKLViwoE1Q810NDQ168MEHddNNN/XqORsaGnTdddfJ5eLY294ot6xG6XlOGYY0a1SC1eUAAPqAYIdd509IUqDdptzyWq3MLORkMwAAYAmPAqIJEya4n1dVVXlcTGc4nU4tWrTIfXJai7S0NF1//fWaN2+ebLbW394zzzyjt99+u9fO+cgjj2jHjh1drg/d69OM5hVDkwfFqF8Y/YcAAN7RLyxQ88f2lyTtzHVq2+FyiysCAAD+yKOA6Oqrr3Y/X7FiRY/2IPrjH/+o3NzcVmOzZ8/W9u3b9dxzz+n999/XCy+80Oa+2267rcufzHXnnBkZGXrooYfcr2NjOfK2t2npP3T2aFYPAQC8a3BcmM4YHidJWr2nSFml1vR1BAAA/sujgOjSSy/VxRdfLNM0VVRUpB/96EdyOp3equ2YTNPUSy+91Gb8kUcekcPhcL++6qqrNGbMmFbX7Nu3T6tXr+5Vc5qmqeuuu051dXWSmrfu3XXXXZ2uEd2nur5RX+wrkSSdPSrR4moAAH3R5EHRGtU/QqYpvbc9T2XV9VaXBAAA/IhHTar/9re/ad68edqwYYMOHz6st99+W0OHDtUFF1yg8ePHKzo6+rgNmn/84x93ad4dO3a06QEUExOjKVOmtLl29uzZ2rlzZ6ux5cuX68wzz+w1c/7lL3/R559/7n795JNPusMi9A5f7C1RfaNLyTEhGpkYbnU5AIA+yDAMnT0qQUeq61XgrNM7W/N06dRkBQXYrS4NAAD4AY8Cop/85CfuAMgwDJmmqdLS0nZX2rSnqwHR5s2b24ylpaW1e+3o0aPbjH399de9Zs7c3Fz9+te/dr9euHChrrjiig7/M0TP+DTjm+1loxK8chIeAADtCbDbdO74JP1zY5ZKq+v1wY58nTchSTZ+9gAAgG7m0RazFqZpyjRNGYbhDopO9PDE3r1724wlJra/7SchoW2/mPbut2rOxYsXu7flRURE6M9//nOna/uuuro6OZ3OVg90nctluo+3P3s028sAAN0rPChA545Pkt1m6GBJtdZ9s8UZAACgO3klIGoJhr77+lgPT5WXtz3dIywsrN1rQ0NDO3S/FXO+/vrrevPNN92vH374YaWkpHS6tu96+OGHFRUV5X544z392c5cpwor6hQWaNcpQ/tZXQ4AwA/0jwzW7G8ORdh06Igy8vmwBwAAdC+PA6KOrBby5uohSe2elhYQ0P5uuaMbSLeorKy0fM6ysjLddNNN7tennXaafvazn3W6rvbcddddKi8vdz+ys7O98r7+6uNvTi+bMSKePhAAgB4zqn+kpqTGSJI+3lWofGetxRUBAIC+zKMeRPfee6+36uiU9lboNDQ0tHtte+Ph4Z1vMuztOX/5y18qPz9fkhQYGKgXXnjBa71tgoKCFBQU5JX3gvRpRvP2srM43h4A0MNOGxarkso6HSyp1vJtubpi6iCFBXn06xsAAEC7fDIgioqKajNWVVXV7rXtjbd3f0/OuX79er344ovu17/5zW/abWwN6xU4a7U9p1yGIc1KIyACAPQsm2Fo3tj++tfGwyqtrtfybXm6ePJABdi90iUAAADAzSc/gho+fHibsYKCgnavLSws7ND9PTlnRkaGe6udzWZTfn6+brnlllbXp6ent3mPDz74QGVlZZKk73//+5o5c2ZHy0cXtawempAcrfgIVmUBAHpeUIBd500YoH9uzFa+s1afZBRqzkmJnKoJAAC8qsMB0W233eZ+PnPmTF1wwQX/v707D4+yPPs+/pvJZCb7vpOECGHfQXaURUQEwbrXrYC22kelrn1t7aJSW2ttVerW9rEFH5euigsIKAgUQUB2gQRJWBJIyL5nsk3m/SMQGWZCEkiYzOT7OY45wpxz3fd1TuDmTs65FofXa2trVVtb2/w8JCSkA9JzbdSoUU6xgwcPumyblpbmFBs5cmSX6bOxsVGvvfZam3LYunWrtm7dKklKSUmhQHQRrD21/tB0ppcBANwoLMCsWUPi9cHuE0o/WaHoIItGnlqfCAAAoCO0eXzySy+9pMWLF2vx4sXasGGD0+s/+clPFB4ervDwcEVEdO5OT4MGDVLPnj0dYqWlpdq+fbtT2zVr1jjFZs+e7RF9wr1q6m36IqNQkjStP9vbAwDcKzkiQJf3iZYkbcwoVGZB+zfdAAAAaEmHTmDvyJ3KzsVgMGjevHlO8SeeeMJhgeglS5Y4TdXq3bu308ibKVOmyGAwODyOHj3aqX2i69ucWaia+kYlhPppQHywu9MBAEDDEkM1OKFplPbKfSeVU2p1c0YAAMBbeOwKh48++qgSEhIcYp999pmGDBmie++9V7NmzdL3v/99p+NeeOGF856z31F9zp8/36GY5uqxZMkSp/M8+eSTza+fvWYROt7atG93L2OdBwBAV2AwGDS1X4xSIgNka7Tr4z05Kqmqc3daAADAC3jkItVS0xpHb7/9tmbPni2r9dtPzw4ePNji2kD333+/5s6d61F9wj3sdnvzAtVXML0MANCFGI0GzRoSr/d2Hldeea0+2H1CN1+apECLx/5YBwAAugCPHUEkSVOnTtWKFSuUlJR0zna+vr762c9+ppdfftkj+8TFdyC3XLllNfL39dH43pHuTgcAAAe+PkbNHZagUH9fldc06MM9OapraHR3WgAAwIN5/EdNU6dOVXp6upYsWaIPPvhABw4cUGFhoYKCgpSYmKgZM2bo7rvvVv/+/T26T1xcp6eXTUyNkp+vj5uzAQDAWYDZpO8MT9C/th9XQUWtVnydqznD4mUyevTnfwAAwE08vkAkSQEBAbr//vt1//33n9fx69evv+h9tmb+/PmaP39+p5wbrVt7anoZ29sDALqysACzrh2eoPd2HldWcbVW7TupWYPjZTSydh4AAGgfPmICzpJfUaM92aWSpGn9KRABALq22BA/XTM0QT5GgzILqvRZWl6n7ygLAAC8z3mNIPrPf/6j3bt3O8QyMjIcnk+bNu2c5zAYDFq7du35dA90qvXpBZKkoYmhignxc3M2AAC0LjkiQLMGx2nF17lKP1khXx+jpvaLZhdOAADQZu0uENntdp04cUInTpw4Z5sNGzac83V+YEFXtSYtTxKjhwAAnqVXdJBmDIzTqv0n9fWJMpl9jJqYykYLAACgbc5rBFFbhi231IbCELqymnqbvsgolCRNH8D29gAAz9IvLlj1tkatTc/XjqwS+fgYlMhsMwAA0AbtLhBR4IE323K4SNV1NsWGWDQoIcTd6QAA0G6De4Sq3tao/x4q1LYjxSoLYTdOAADQunYViFjwEN7u81O7l03rH0sxFADgsUYkh8suaeOhQh0s91HY1Lv4OQ4AAJxTmwtETz75ZGfmAbid3W7X2rSmAtEVrD8EAPBwI5PD5WMwaP03BQodc73+uqtcI0eyDiQAAHCNAhFwysG8Cp0otcpiMmpiapS70wEA4IINSwpTeWGudhQZ9UlGtZ5Ytk+//s5gGY0UiQAAgCOjuxMAuorTo4cmpkbJ38x6DQAA73BJUKOKPlksg6S/b8vSg//crdoGm7vTAgAAXQwFIuCUtae2t79iANPLAADepWrfWj00Lkwmo0Ef78nRvL9tU5m13t1pAQCALoQCESCpqLJWu7JLJUnTWH8IAOCFLkv215IFoxVkMWnL4WLd/KcvlVtmdXdaAACgi6BABEhad7BAdrs0KCFE8aH+7k4HAIBOcVmfaP3z3nGKCbboYF6Frn9ts77Jq3B3WgAAoAugQARI+jz91PQyRg8BALzcoIRQvX/fBPWODlRuWY1ueH2z1h3Md3daAADAzSgQodura2jUf78plCRNGxDr5mwAAOh8ieEBeu9/Jmh0Srgqahp019Kv9Nr6DNntdnenBgAA3IQCEbq9bUeKVVnboKggi4b2CHV3OgAAXBRhAWa98/1xunVMsux26XerDuqBv+9SdV2Du1MDAABuQIEI3d6aU7uXTesfLaPR4OZsAAC4eMwmo569foh+fd1gmYwGrdibq+tf26zs4mp3pwYAAC4yCkTo1ux2u9aeXn+I6WUAgG7q9rE99fd7xikqyKL0kxWa/ceN+uTrXHenBQAALiIKROjWMvIrlV1sldnHqEmpUe5OBwAAtxmdEqGPF07U8KQwldc06L53duqn738ta53N3akBAICLgAIRurW16U27tozvHalAi8nN2QAA4F7xof769w/H63+m9JbBIP19W5bmvPKF0nLL3Z0aAADoZBSI0K2tTTs9vYzt7QEAkCRfH6Men9lfb901VtHBFmXkV+raVzfpjY2HZWtklzMAALwVBSJ0W0WVtdpxrEQS6w8BAHC2SX2itOrByzS1X7TqGhr1zIo03fznL5VZUOnu1AAAQCegQIRua93BAjXapYHxIeoR5u/udAAA6HIigyz62/zR+vV1gxVkMWnHsRLNWrxRf96QyWgiAAC8DAUidFtrDjRNL5s+kNFDAAC0xGAw6PaxPbX64ct1WZ8o1TY06tmV6br+9c06kMPaRAAAeAsKROiWaupt+u+hAknSdNYfAgCgVT3C/PV/d43R724YqmA/k/Zkl+qalzfq6Y/3q6Km3t3pAQCAC0SBCN3SlsNFqq6zKTbEosEJoe5OBwAAj2AwGHTz6CR99vBkzR4ar0a7tGTTUU37wwZ9uPuE7HamnQEA4KkoEKFbWtO8e1msjEaDm7MBAMCzxIX66dXbRur/7hqjS6ICVVBRqwf/sVu3/u8W7TtR5u70AADAeTC5OwHgYrPb7VpzIF+SdCW7lwEAcN4u7xutVQ9dpr9sOKxX1mVoy+FizXnlC90wMlGPzeinuFC/dp8zKytLhYWFnZBtx4uKilJycrK70wAAoENQIEK3sz+nXCfLa+Tv66PxvSPdnQ4AAB7NYvLRwiv66Dsjeuh3qw/q4z05+s+O41qxN1c/uLyX7r28lwItbfuRMysrS/0HDJC1urqTs+4Y/gEBSk9Lo0gEAPAKFIjQ7Xx2aveyy/tGyc/Xx83ZAABwcaSlpXV6Hwv6SROjIrVkd7kOFtXrj2sP6a1Nmbp1cLCmpvjLp5Vp3WlpabJWV+v2x59XbHLvTs/3QuRlZeqd536swsJCCkQAAK9AgQjdzun1h6YzvQwA0A2UFzft2nnHHXdc1H4D+k1U2OT5KgmP12vby/TSJ7tVsu6vqjm6u9VjAyNildhnUOcnCQAAmlEgQreSU2rV/pxyGQzStP5sbw8A8H7WynJJ0ux7f6Z+Q0dd1L5tdimzokHpZT5SzCWKveUZxfo1amiYTSFm5x3P0rZt0Mo3F6umpuai5gkAACgQoZtZe2r00KjkcEUGWdycDQAAF09kQk+3jMrpKWl8vU3bjhRr7/FS5dUYteakUQPiQzSuV4SC/Xyb2+ZlZV70/AAAQBMKROhWPktr2r1s+kCmlwEAcLH4+/poct9oDU0M1aaMQmUWVOlAbrkO5lVoRFKYLk0Jl8XEuoAAALgTBSJ0GxU19foys2nbXNYfAgDg4gsPMOuaoQnKLbPqi0OFyimr0fZjJdqXU6YxKRFq1LkXsQYAAJ2HAhG6jY2HClVvsyslMkC9owPdnQ4AAN1WfKi/bhyVqCOFVdqUUaTi6jr991ChLOqlgAGTZXdenggAAHQyCkToNtYc+Hb3MoOBTygBAHAng8GgXtFBSokM1IHccm05XKSqOrOi5/5YO2vqFFlcreSIAHenCQBAt2F0dwLAxdBga9TnB1l/CACArsZoNGhwj1DNm5CinspXY221Ku1mLdt1Qh/vyVFJdZ27UwQAoFugQIRuYWdWqUqr6xXq76tLe4a7Ox0AAHAWXx+jklSkE3/+vnqYKmUwSIcLq/T2lmPaeKhAtQ02d6cIAIBXo0CEbmHNqe3tp/WPkcmHf/YAAHRVjdZypZrLdcfYnuoZGaBGe9MHPW9uPqavT5SpkQWKAADoFPymjG7hzPWHAABA1xcRaNZ3hvfQtcMSFB7gK2u9TZ+n5+vv27J0vKTa3ekBAOB1WKQaXi+zoFKHC6vk62PQ5X2j3J0OAABoh5SoQCVFBOjrE2XacrhIhZV1em/nCfWODtRlfaIV6u/r7hQBAPAKFIjg9U6PHhrXK1LBfvwQCQCAp/ExGjQ8KUz94oK15XCRvj5RpsyCKh0trNaonuEanRLOFHIAAC4Qd1J4vdPrD13J7mUAAHg0f18fTe0Xo9vHJCs5IkA2u13bjhbr7a1ZOlZU5e70AADwaBSI4NWKKmu141iJJOkK1h8CAMArRAZZ9J3hCZo9JF5BFpPKrPX6YHeOVn6dq6raBnenBwCAR2KKGbzauoMFarRLA+ND1CPM393pAACADmIwGJQaE6TkiABtOVyk3dml+ia/UkeLqjWhd6SGJIbKaDC4O00AADwGI4jg1Zp3L2N6GQAAXslsMuryvtH67pgkxYZYVGdr1PpvCvTPr7KVX17j7vQAAPAYFIjgtWrqbfrvoQJJ0vQBMW7OBgAAdKaYYD/dfGmSpvSLltnHqPyKWv3jq2xtOFig2gabu9MDAKDLo0AEr/Xl4SJV19kUG2LR4IRQd6cDAAA6mdFg0LDEMH1vfE/1jQ2SXdLu46V6e0uWjhSyiDUAAOdCgQhe6/T0sisGxMpoZA0CAAC6i0CLSVcPjtd3hico1N9XlbUN+mhPjlbvPylrPaOJAABwhQIRvJLdbv92e3t2LwMAoFvqGRmo28cma2RymAyS0k9W6K0vj+lQfoW7UwMAoMuhQASvtO9EufLKa+Xv66PxvSPdnQ4AAHATXx+jLusTrZsuTVREgFnWeps++fqkVnydq6raBnenBwBAl0GBCF5p9f6TkqTJfaPl5+vj5mwAAIC7xYf669axSRqdEi6DQcrIr9TbW48p/WS57Ha7u9MDAMDtKBDBK606VSCaOTjOzZkAAICuwmQ0akLvKH13dJKigsyqqW/U6v15+nhvriprGE0EAOjeKBDB62TkVyojv1K+PgZN7c/29gAAwFFMsJ++OzpZ43tFymiQjhRW6a2tx7Q/p4zRRACAbosCEbzO6ell43tHKdTf183ZAACArsjHaNCYSyJ025hkxYZYVNfQqDVp+fpwT44qaurdnR4AABcdBSJ4ndMFopmDmF4GAADOLTLIoptHJWlSapR8jAYdK6rW21uyGE0EAOh2KBDBq5wotWrv8TIZDNKVA9neHgAAtM5oNGhUz3DdNiZZcSF+qrOdGk20m9FEAIDugwIRvMqnp0YPXdozXNHBFjdnAwAAPElEoFk3XZr47Wii4qbRRPtOMJoIAOD9KBDBq6za11QguorpZQAA4DwYDU2jiW4fk6z40KbRRGvT8/XB7hyVM5oIAODFTO5OAOgoRZW1+uposSQKRAAA4MKEB5p146hE7c4u1ebMImUVV+udLVm6rE+UBiWEuDs9oF2ysrJUWFjo7jTaJCoqSsnJye5OA+iWKBDBa6xJy1OjXRqUEKKkiAB3pwMAADyc0WDQyORwXRIVqM8O5Cm3rEZr0/N1KL9SA/3cnR3QNllZWeo/YICs1dXuTqVN/AMClJ6WRpEIcAMKRPAaq/fnSWL3MgAA0LHCA5xHE+UYfBU07CrWJkKXV1hYKGt1tW5//HnFJvd2dzrnlJeVqXee+7EKCwspEAFuQIEIXqGipl5fHGoaNnvVYApEAACgY7kaTRQ5c6Ge/m+xXr2kmtHL6PJik3srsc8gd6cBoAtjkWp4hXUHC1Rna1SvqED1iQlydzoAAMBLnR5NNDSsQY31NdqbV6cZL/5Xb2w8LFsjo4kAAJ6LEUTwCqtPbW9/1eA4GQwGN2cDAAC8mdFgUJ+QRq383Y808eHXdLRKemZFmv6+OUP3jQ7VJWG+7k7RCQv/AgBaQ4EIHq+m3qb16fmS2L0MAABcHOXFBWooydGGX16noGEzFD5lgTJLgvTIqjyVb31fZZv/IXtDrbvTbMbCvwCA1lAggsdbf7BAVXU29Qjz17DEUHenAwAAugFrZbkkafa9T6jf0FGy2qQ9xY06YfVR6PibFD/pRg2PaFC8v/unnbHwLwCgLSgQweOt+DpXkjRrCNPLAADAxRWZ0LN54d8+kjILKrX+YIEqaxu0ucBXvaMDdXmfaIX4d71pZwAAnIkCETxaTb1Na9OatrefNSTezdkAAIDurnd0kJLCA7TtaLF2ZZUos6BKx4qqNeaSCI1IDpPJyB4xAICuiTsUPNr6g/mqPjW9bHhSmLvTAQAAkNlk1KTUKN02Jlk9wvzV0GjX5swivb0lSxn5lbLb3T/tDACAs1EggkdbvpfpZQAAoGuKDLLohpE9NGNgrALNPiqz1mvF17l6f+cJFVR0nQWsAQCQKBDBg1nrbPr81O5ls4cmuDkbAAAAZwaDQQPiQ/S98SkakxIhH6NBx0utendblj47kKfymnp3pwgAgCTWIIIHO3N6GbuXAQCArsxsMmp870gNSgjRpoxCfZNfqQO55Tp4skJDEkM1OiVcAWZ+NAcAuA93IXis5ad2L5s9NJ7pZQAAwCOE+Pvq6iHxGl5m1aaMIp0otWp3dqn255RpZHK4RiSHyWLycXeaAIBuiAIRPJK1zqbP005NL2P3MgAA4GHiQ/11w8geyiqu1ubMIuVX1GrrkWLtyirV0MRQjUgOY0QRAOCi4q4Dj7TuYL6s9TYlhvtrKNPLAACABzIYDOoZGajkiABl5Fdqy5FiFVfVafuxEu3KLtXghBCN7BmuED9fd6cKAOgGvKJAVF1draVLl+qDDz5QWlqaCgoKFBQUpMTERF111VVasGCB+vfv3+X6LC0t1dq1a7Vlyxbt2bNHmZmZys/Pl9VqVWBgoBISEjRq1CjdfPPNmj17tnx8GG582orT08uGML0MAAB4NoPBoD6xwUqNCdLhwip9dbRYeeW12nO8THtPlKl3dJCGJYaqR5g/P/cAADqNxxeI1q9fr3nz5ikrK8shXltbq6KiIu3Zs0cvvviiHn/8cS1atKhDbqod0eeaNWt09dVXq6GhwWUf5eXlKi8vV3p6ut555x1deumlevfdd9WnT58Lzt/TVdc1fDu9bCjTywAAgHcwGAzqHR2kXlGByi6xavvRYmWXWJWRX6mM/EpFBpo1JDFUA+JCZDaxGTEAoGN5dIFo3bp1mj17tqxW6znb1dfX65lnnlFJSYleeeWVLtFnZWVli8UhV7Zv364pU6boq6++UkJC997SfV16QfP0siE9mF4GAAC8i8FgUHJEgJIjAlRQUau9J0qVnluhoqo6rT9YoC8OFap3dJD6xQUrOSJAPkbvGVWUlZWlwsJCd6fRJlFRUUpOTu608zc22iVJRi/6+wXQtXlsgai8vFx33HGHU6GmX79+mjx5srKysvTpp5+qsbGx+bVXX31VM2bM0Ny5c7tcn/Hx8ZowYYKioqJ05MgRrVu3TvX19Q5tcnJy9Pjjj+utt946r/y9xYqvcySxexkAAPB+0cEWXdE/VpNSo5SWW6G9x0tVUl2vg3kVOphXIX9fH/WNDVKfmGDFh/p5dDEhKytL/QcMkLW62t2ptIl/QIDS09JaLRLZ7XYVV9Upp7RGJ0qtyim1KrfMqqKqOpVW16u0uulreU296hoaVW+zq97WqIZTBSI/X6MCzCb5+/rI3+yjyECzeoT7KzHMv+lreID6xwUrMshyMd42AC/msQWiP/zhD8rJyXGITZ8+XZ988ol8fZsW8luyZInuuusuhzaPPPKI5syZc16Fhc7o85prrtFjjz2myy+/3OH1AwcOaObMmcrOznZo/9577+kvf/mL/P39252/NyivqdeaU9PL5gzt3iOpAABA92Ex+Wh4UpiGJYYqr7xW6SfL9U1epaz1Nu05XqY9x8tkMRnVMzJAl0QGqmdkoPzNnrV+ZWFhoazV1br98ecVm9zb3emcU15Wpt557scqLCxUbEIPnSxrKv6cKLEqp7RGOaVW5ZRZmwtCNfWNrZ+0BTX1jaqpr2t+niFJR5zbJUX4a1himIYnffsw+TAVEUDbeWSByG63a+nSpU7x3/72t82FGklasGCB/vCHP2j//v3NsczMTG3YsEFTpkxxa59JSUn6/PPPNXXqVJf9DRw4UIsXL9b111/vELdarcrIyNCQIUPalb+3WLXvpOoaGtU7OlCDEkLcnQ4AAMBFZTAYFBfqp7hQP13WJ1rZxdU6mFeho4VVqmlo1Dd5lfomr1KSFBVkVkKov/xqjPIJjnJz5m0Xm9xbiX0GuTsNSVKDrVHVdTZV1TWoqtamytoGVdTUK8/fpLg7/6C7PspT6b9WtelcMcEWJYT5q0eYv+JD/RQVbFGYv6/CAswKC/BVqL+vLCajfH2MMp/6arfbVV1nk7Xepuo6m6prG1RQWavjJU3Fp+MlVmUVVeloUbWyi63KLrZq+d6mzVxC/X01rX+MevtZZTB3zw+XAbSPRxaI9u3b57RAdHh4uEaNGuXUdvr06Q7FGklavnx5uwtEHd2nq+PONnnyZJfx1tY/8mYf7j4hSfrO8B5MLwMAAN2aj9GglKhApUQFqtFu18myGh0prNLRoioVVtY1PySTEu9bqu9/nKehe7apX1yw+scFq29ssHpHB8nP17NGGp0Pu92uhka76hoaVdvQeOqrrfl5Tb1NVXU2Vdc1qLr2VEGorul114yyJPRTaU3T636+RvUI828uACU0P/yUGBag2FCLLKbz+z5HtqFNmbVe+06UaXd2qfZkl2rb0WKVVtdr2a6mn52TfvSuNuX7aFRopS6JCvSqdasAdByPLBDt3LnTKdavXz+XbQcMGOAU27Vrl0f0abPZXMZ79uzZ7nN5g7zyGm3OLJIkXTu8h5uzAQAA6DqMBkNzUWJiapSqahuUU9Y03elYXrGKa+wqtkrrDxZo/cECh2OjgiwOa9pEB1kUFuCr8ACzwgObRrcEnloDx89slNnH2K4P6ux2e/O6OvW2puJMne3btXZOP99fUCu/lOHKtRpUk1+pRrtdtsYzHna7GmxNf25obDz1telhszXFGs5o39BoV8OpfmobbDq1pE+7+RgNCjT7KMBsUpCfSSF+JtkqCvXZX3+rN19/SVeMG6HwAF+3fngZ6u+rialRmpjaNFKswdaonVmlWpOWp493HlNupXSyRlrxda4CzD4aGB+iwT1CFerv28qZAXQnHlkgysjIcIrFxsa6bBsTE9Om47tinx9++KFTbMSIES32e1ptba1qa2ubn5eXl7e7767o4z05stulkclhSo4McHc6AAAAXVagxaQ+McHqExOs44Z8vfjQbXpr+TrZQ+J18GSFDp6sUPrJcpXXNKiwslaFlbXak13apnMbDZKvj1Fn1kMM+vbJmfEGm111travvxN7yzPaXCCpILfNx7SHQZLZ1DSFy9L81UcWk1GBFpMCzD4KNJsUaGkqCAWafWQ2ORfEjh/K10eHtqh3uK8iAs2dkuuFMPkYNeaSCI25JEIz46wae+W1mvboq8qymlVdZ9P2YyXafqxEKZEBGnNJhOJDmYIGwEMLRGVlZU6xwMBAl20DApwLCa6O72p95uXl6ec//7lT/OGHH2712GeffVZPP/10u/rzBB+cnl42gtFDAAAA7WGvr9GAKLNGjvx2JLrdbleZtV7HS6zNa9rklFpVXFWn4qo6lVQ3PUqr6lVdb5Pt1BCcRrtU2+LUq9YZDJLZp2kkkq/p9FeDGuvrdCQzQ7GJPeXnHyAfo0E+BoOMp76afAzyMRpkMp7+apSPz5nPT8XObONjkK/Pt8Wg9o5+8hYNxcc1OMymK0ddosOFldp/olzHiqt1tKjpkRjurzEpEUoM9++W3x8ATTyyQFTtYutLk8n1WzlzAenTKisru3SfxcXFmjlzpvLy8hzi3/nOd3TnnXe2evxPf/pTPfLII83Py8vLlZSU1Ob+u6KM/ErtO1EuH6NBs4fEuzsdAAAAj2cwGE4tkGzW4B6hrbavtzXKWm9TTZ2txQKR/YxpXHbZ5etzatHl5oWXmwo3rooQO3fu1KhRV+vWV99XYh/P/tm1q/IxGppHlpVU12nHsRKl5ZafKhKeUHyonyb0jlRiOKP1ge7IIwtErkbo1NfXu2zrKh4UFNRl+zxx4oSuuuoqp0Wux40bp7fffrtN57BYLLJYLG1q6ylOL059eZ8oRQZ513sDAADwBKeLPSF+rFvjDcIDzJo+IFZjUiK0I6tE+3PKlVtWo/d2nlCvqEBNSo1SeBecPgeg83hkgSg01PkTjqqqKpdtXcVdHd8V+jx06JCuvPJKHTt2zCE+duxYrVy5ssUpbd7Obrfrw905kpheBgAAAHSkEH9fTe0XozEpEdp2tFhfnyjT4cIqHSmq0pAeoRp7SYQCzB75ayOAdjK6O4HzkZqa6hQ7ezrWafn5+W063t197tq1S5MmTXIqDk2bNk1r1qxRWFhY25P1MjuzSpVVXK0As4+uHHjuBboBAAAAtF+gxaSp/WJ0x9ieuiQqUHa7tPd4md788pj2ZJeq0X6e28AB8BgeWSAaNWqUU+zgwYMu26alpTnFRo4c2aX63LBhg6ZMmeJUWLrpppu0cuXK85oS501OTy+7alAcn14AAAAAnSgi0Ky5wxJ0/Ygeig62qK6hUeu/KdA/v8pWXnmNu9MD0Ik88rftQYMGqWfPng6jbUpLS7V9+3ZdeumlDm3XrFnjdPzs2bO7TJ8fffSRbrnlFtXUOP5ne//99+uPf/yjjEaPrOF1mHpbo5bvbdrm9NrhCW7OBgAAAPiWqw+Gu5rzzTEpIkDfHZ2kr0+UaXNmkfIravXPr7I1NDFU43tHymLy6eBMAbibRxaIDAaD5s2bp0WLFjnEn3jiCa1YsaJ5F7ElS5bowIEDDm169+6tyZMnO8SmTJmiDRs2OMSOHDmilJSUTutTkt588019//vfV0NDg0N80aJF+sUvftHS2+9WvjhUqOKqOkUFmTUpNcrd6QAAAAAqLy6QJN1xxx1uzqTtzmcnZ6PBoGGJYUqNDtLGQ4U6mFehPcfLlJFfqWn9Y9QrunvPdEDHy8rKUmFhobvTaJOoqCglJye7O40O5ZEFIkl69NFH9cYbbygnJ6c59tlnn2nIkCGaPHmysrOztXr1aqfjXnjhBZfbal7sPj/99FMtWLBA9rPm8vbq1UtFRUV66KGHWszjtttu05gxY87rPXia/+w4Lkm6ZmiCTD7dezQVAADAhejqo126en5nslaWS5Jm3/sz9RvqvBRFV5K2bYNWvrnYacZCewRaTJo5OE4DE0K0Lj1fpdZ6fbw3V/3jgjW5b7T8fBlNhAuXlZWl/gMGyFpd7e5U2sQ/IEDpaWleVSTy2AJRSEiI3n77bc2ePVtWq7U5fvDgwRbXBrr//vs1d+7cLtFnTk6OU3FIkg4fPqzFixefM4/hw4d3iwJRcVWdPj1wUpJ006WJbs4GAADAM3naaJfzGeniLpEJPZXYZ5C70zinvKzMDjtXckSAbh+brC2Hi7Uzq0TpJyuUVVytKxhNhA5QWFgoa3W1bn/8ecUm93Z3OueUl5Wpd577sQoLCykQdRVTp07VihUrNG/ePGVnZ7fYztfXV//v//0//epXv/LIPrurD3efUL3NrkEJIRqUEOrudAAAADySp4x26YiRLuh8Jh+jJvWJUu+YQH12IE8l1U2jifqdGk3kz2giXKDY5N5dvvDqrTy6QCQ1FWzS09O1ZMkSffDBBzpw4IAKCwsVFBSkxMREzZgxQ3fffbf69+/v0X12N3a7Xf/8qqkAd8voJDdnAwAA4Pm6+miXjhzpgs4XH+qv28Yka8uRYu08VqKDJyuUXVytqf1ilBrDaCLAE3l8gUiSAgICdP/99+v+++8/r+PXr19/0fucP3++5s+ff17Hdgf7c8qVfrJCZpNRc4exexkAAADQ1Zh8jJqUGqXU6CB9lpan4qo6rfg6V31jgzSlb4z8zYwmAjwJq/6iS/rX9qbRQ1cNilNYgNnN2QAAAABoSVyon24dnaRLe4bLYJC+yavUW1uO6VBehbtTA9AOFIjQ5dTU2/TBrhOSpJtZnBoAAADo8kw+Rk1MjdItlyYpMtAsa71Nn+w7qRVf56q6rsHd6QFoAwpE6HJW7z+p8poG9Qjz14TeUe5OBwAAAEAbxYb46btjkjQmJUIGg5SR3zSa6ODJCpe7OAPoOigQocv59/bjkqQbRiXKx2hwczYAAAAA2sNkNGp870h9d3SSooLMqqlv1Kr9J7V8b66qahlNBHRVFIjQpWQXV2tTZqEk6aZRTC8DAAAAPFVMsJ++OzpZ43pFyGiQDhdW6a0tx3Qgt5zRREAXRIEIXcp7O4/Lbpcm9I5UUkSAu9MBAAAAcAF8jAaNvSRSt45JVkywRbUNjfrsQJ4+2pOjipp6d6cH4AwUiNBlNDbam6eX3TI6yc3ZAAAAAOgoUUEW3XJpkib0jpSPwaCjRdV6e0uW9p0oYzQR0EVQIEKX8eXhIp0otSrYz6SrBsW5Ox0AAAAAHchoNGh0SoRuG5usuBA/1dkatTY9X//ecVyFlbXuTg/o9kzuTgA4rV9csH5ydX/ZGu3y8/VxdzoAAAAAOkFEoFk3XZqo3dml2nK4SLllNXp3W5ZSg3xk8PVzd3pAt0WBCF1GVJBFP5zc291pAAAAAOhkRoNBI5PD1ScmSBu+KVBmQZUOVfgo4fuvaXO2VSNG2GUwsKMxcDExxQwAAAAA4BbBfr66ZmiC5g5LUICPXaaQGP3+y1Ld8pct2neizN3pAd0KBSIAAAAAgFtdEhWoK+PrVbrp7zL7SNuOFGvOK1/o//1nj/LLa9ydHtAtUCACAAAAALidySiVffGOXp4Zo2uHJ8hul/61/bim/n69Xvj0oMpr6t2dIuDVKBABAAAAALqM6EAfLf7uCL1/3wQNTwpTVZ1Nf/w8Q5c9t06vrc9QdV2Du1MEvBIFIgAAAABAlzMyOVzL7pug128fqT4xQSqz1ut3qw7q8t+t01+/OCJrnc3dKQJehQIRAAAAAKBLMhgMunpIvFY9dLlevGWYkiMCVFhZp18tP6AJv12rl9Z8o5KqOnenCXgFCkQAAAAAgC7Nx2jQdSMStfbRyfrNdUOUHBGgkup6vbTmkCb89nM9/fF+HS+pdneagEejQAQAAAAA8Ai+PkbdNjZZnz86WX+8dYQGxofIWm/Tkk1Hdfnv1un7b27Xf78pUGOj3d2pAh7H5O4EAAAAAABoD5OPUXOHJWjO0Hj991Ch/vLfTG3KKNKatDytSctTSmSA7hjXUzeMTFR4oNnd6QIegQIRAAAAAMAjGQwGTe4brcl9o5WRX6G3t2TpvR3HdbSoWs+sSNNzq9I1rX+Mrh+ZqKn9YmQ2MYkGaAkFIgAAAACAx0uNCdZTcwfpx1f104e7c/TO1mPan1Ou1fvztHp/niICzZozNF7XDEvQqORwGY0Gd6cMdCkUiAAAAAAAXiPQYtJtY5N129hkpZ8s1/s7T2jZrhMqqKjVm18e05tfHlNsiEVXD47XrCHxGtUzXD4UiwAKRAAAAAAA79Q/LkRPzArR/7uqn77IKNRHu3P02YE85ZXXaunmo1q6+aiigsya3DdGU/tH67LUaIUG+Lo7bcAtKBABAAAAALyayceoKf1iNKVfjGobbNqUUagVe0/qswMnVVhZp/d2Htd7O4/Lx2jQqORwTe4Xran9YjQgPlgGg+vRRVlZWSosLLzI7+T8REVFKTk52d1poIujQAQAAAAA6DLS0tI6vY8wSbenSjdfEqX0ojrtzK3VztxaZZc3aNvRYm07WqznVx9UhL9RI+IsGhVv0ZAYiwLNTYtc5+bm6sabblKN1drpuXYE/4AApaelUSTCOVEgAgAAAAC4XXlxgSTpjjvucFsOPiEx8u81Sv69LpVfz2Eqlp/WHrFq7RGr7PZG1Z3MVE3WXtVkfa1am3Tdjxbpkn6D3ZZvW+RlZeqd536swsJCCkQ4JwpEAAAAAAC3s1aWS5Jm3/sz9Rs6ys3ZSDa7VFhTr5M1Rp20GlXZYJQlvo8s8X0UOvYG2RttOmSyyWiIVWK4vxLC/OXrY3R32sB5o0AEAAAAAOgyIhN6KrHPIHenIUnqecafK2sbdLykWsdLrMrMKVSN0ayKRh9tP1ai7cdKZDRIsSF+SgjzV0Kon+LD/OXv6+O23IH2okAEAAAAAEArgiwm9Y8LUf+4EIXnbNU/Xv+drnjwBRkjkpVdYlVlbYNyy2qUW1ajHaeOiQg0KyG0qWgUH+qnUH/fFhe9BtyNAhEAAAAAAO1kKy9QnMmq4YPiZLfbVWatV05pjXLKrMoptaqkul7FVXUqrqrTvpym6XMBZh8lhPorIaypaBQVZJGPkYIRugYKRAAAAAAAXACDwaCwALPCAswamBAiSaquOzWi6FTRKK+8RtV1NmUUVCqjoFKSZDIaFBfqp4RQf8WGWhQb7KdAC7+mwz34lwcAAAAAQAcLMJvUOzpIvaODJEkNtkblldc2jzDKLatRbUOjjpdYdbzE2nxcsJ9JsSF+igvxU2yIRTHBfjKbWPwanY8CEQAAAAAAnczkY1SPcH/1CPeXJNntdhVX1SmntEa5ZVblldequLpOFTUNqqipVEZ+0ygjg5rWMooN8VNMsEVRQRZFBZllYQFsdDAKRAAAAAAAXGQGg0GRQRZFBlk0JDFUklTbYFN+ea3yymt0srxGeeW1qqxtUFFVnYqq6nQg99vjg/1MzcWipq8WhQX4ysgi2DhPFIgAAAAAAOgCLCYfJUUEKCkioDlWVdvQXDAqrKxTYWXtqVFGTY8jhVXNbX2MBkUGmhXm73tqTSRf1dUaZPQPkd1ud8dbggehQAQAAAAAQBcVaDGpV3SQep1ay0iSauttzcWiwspaFVTWqqiyTg2NduVX1Cq/ovaMM/gq6Ufv6nsf5Cn1yy+UEhWolMhA9Qj3V3yon+JD/RQb4qdgP9+L/+bQpVAgAgAAAADAg1h8fRzWM5KkRrtdZdZ6FVfVqbS6XqXVdSq11quovFpWm0FV9XbtOV6mPcfLXJ4zyGJS3KmCUVxI09eYED9Fn1r36PT6R/5m1j7yVhSIAAAAAADwcEaDQeEBZoUHmB3ixw/t14sP3qr3Vm+Qf0yyjhRW61hRlXLLanSyrGmB7PKaBlXWNigj/9vFsVsSaPZpLhqd/TUqyOwQ82MhbY9CgQgAAAAAAC9mb6hVzzBfjRwc7/L1qtoGnSw/XTCq0ckyq3LLmhbJLqysVUFF09fahkZV1dlUVVSto0XVrfYbbDF9WzwKNis6yEVhKbipsAT3o0AEAAAAAEA3FmgxqXd0kHqfsc7R2ex2uyprG04Vi+qai0ZnFpDOfK3O1qiK2gZV1Dbo8BkLabeYg69BCd9/XV/kmxTXmK/QMxbaDvXzldHI7mydjQIRAAAAAABeLi0trcPOZZIULyneT5KfpChJ8pEUIClAdrtd1fV2ldY0qrTGptLaRpVaG1VaazsVa2x+ray2UQ2NUlW9Xb6RScqrkfLOWifJx2BQeKCvIgMtighqGokUE2xRoIWSRkfiuwkAAAAAgJcqLy6QJN1xxx1uzqRlRr8gGQPDZAqK1NS7npB/dKLKqutVZq1XSXXT7mxNu7bVSXnfHhdkMSk2xKKYED/Fn1pY2+RjdN8b8XAUiAAAAAAA8FLWynJJ0ux7f6Z+Q0e5OZuWpW3boJVvLla0yjQ8dXhz3G63q7ymQUWVtSqqqlNRVdMUtuKqOlXWNqiyoEGZBU1T2HwMBsWGWJp2eAtrelAwajsKRAAAAAAAeLnIhJ5K7DPI3Wm0KC8r02XcYDAo1N9Xof6+6hX9bbyuoVEFFbXKq6hRXlmNTpRZVVVrU05ZjXLKavSVSuRjNCgp3F8pkYFKiQpUqL/vRXo3nokCEQAAAAAA8Chmk7FppFC4v6SmkUZl1nodL7XqRIlVx0usqqxt0NHTO659U6CIALNSY4LUJzZIkYFmGQwsfH0mCkQAAAAAAMCjGQyGU7uemTU4IVR2u11FVXU6Wlilo0XVyimzqri6TtuOFmvb0WKFB/iqT0yw+sYGKTLI4u70uwQKRAAAAAAAwKsYDAZFBVkUFWTRpSkRqq236UhRlQ7lVepYcbVKquubi0WxIRYNjA9Rv9hgWXx93J2621AgQpeTlZWlwsJCd6fRqo7cJhIAAAAA0Hksvj7qHxei/nEhqm2w6UhhU7HoaFGV8sprlVdeoP8eKlTv6EAN6RGqHmH+3W4KGgUidClZWVnqP2CArNXV7k6lzSorK92dAgAAAACgjSymb4tF1XUNSj9ZoQM55SqqqtM3eZX6Jq9SkUFmDUsMU/+4YPl2k53QKBChSyksLJS1ulq3P/68YpN7uzudczq9DWNNTY27UwEAAAAAnIcAs0kjk8M1IilM+RW12neiTOknK1RUWafP0/O1KaNQgxJCNDwpTMF+3r0LGgUidEmxyb279BaMUsvbMAIAAAAAPIvBYFBsiJ9iQ/w0MTVKB3LLtfd4mcqs9dqZVard2aXqFxesUcnh7k6101AgAgAAAAAAOMXP10cjk8M1PClMR4uqtCurVMdLrErLrVBaboXi/U0yJ/R3d5odjgIRAAAAAADAWYwGg3pFBalXVJBOltVo+7FiZRZUKddqVPydv9d/j1k1cqS7s+w43WOlJQAAAAAAgPMUF+qna4Ym6Hvjeiol0KaGymKNTrC4O60ORYEIAAAAAACgDcIDzRoVaVPOn78vf1/vKql417sBAAAAAADoZPaGOnen0OEoEAEAAAAAAHRzFIgAAAAAAAC6OQpEAAAAAAAA3RwFIgAAAAAAgG6OAhEAAAAAAEA3R4EIAAAAAACgm6NABAAAAAAA0M1RIAIAAAAAAOjmKBABAAAAAAB0cxSIAAAAAAAAujkKRAAAAAAAAN0cBSIAAAAAAIBujgIRAAAAAABAN0eBCAAAAAAAoJujQAQAAAAAANDNUSACAAAAAADo5igQAQAAAAAAdHMUiAAAAAAAALo5CkQAAAAAAADdHAUiAAAAAACAbo4CEQAAAAAAQDfnFQWi6upqvfbaa5oxY4aSkpLk5+enqKgoDR8+XI8//rjS09O7fJ9r167V3Xffrb59+yokJESBgYHq1auXbr31Vi1btqzD8wcAAAAAADjN5O4ELtT69es1b948ZWVlOcRra2tVVFSkPXv26MUXX9Tjjz+uRYsWyWAwdKk+S0tLNW/ePH300UdOrx05ckRHjhzRP/7xD02aNEl///vflZiYeMH5AwAAAAAAnMmjRxCtW7dOs2bNcirUnK2+vl7PPPOMFi5c2KX6rKio0JVXXumyOHS2L774QlOnTtXJkyfbnTMAAAAAAMC5eGyBqLy8XHfccYesVqtDvF+/frrnnns0c+ZMGY2Ob+/VV19tUzHmYvX5xBNPaPv27Q6xwMBA3XrrrZo/f74iIyMdXsvIyNB999133vkDAAAAAAC44rEFoj/84Q/KyclxiE2fPl1ff/21/vznP2vlypV64403nI575JFHZLfb3d5nZmamXnvtNYdYQECAtm7dqnfffVdLlizR3r17FR8f79Bm2bJl2rhx43nlDwAAAAAA4IpHFojsdruWLl3qFP/tb38rX1/f5ucLFizQoEGDHNpkZmZqw4YNbu9z6dKlamxsdIjdddddDscmJCTokUcecerzr3/9a7vzBwAAAAAAaIlHFoj27dvntAZQeHi4Ro0a5dR2+vTpTrHly5e7vc9PPvnEqc2VV17ZpnOtWLGi1XwBAAAAAADayiMLRDt37nSK9evXz2XbAQMGOMV27drl1j7r6+u1b9++Np2vf//+TrHCwkJlZ2efM18AAAAAAIC28sht7jMyMpxisbGxLtvGxMS06fiL2WdWVpbq6uradD4/Pz+FhISovLzc6XxJSUku+6+trVVtbW3z87KyMklyOkdXVFlZKUk6fmi/aq3Vbs7m3PKyMiVJJ49+o8zAADdnc26ekqun5CmRa2ch187hKbl6Sp4SuXYWcu0cnpKrp+QpkWtnIdfO4Sm5ekqeklRw/Iikpt9fu/rv2afza9NazHYPtHDhQrskh8dtt93msu2qVauc2oaGhrq1zx07dji9LsleV1fn8nxxcXFObZctW9Zirk8++aTL8/PgwYMHDx48ePDgwYMHDx48ut8jOzu71bqHR44gqq52HlliMrl+K2cuIH3a6VEq7urT1bku5Hxn++lPf+qwuHVjY6OKi4sVGRkpg8HQ4nHwXuXl5UpKSlJ2drZCQkLcnQ7gFbiugI7HdQV0PK4roON50nVlt9tVUVGhhISEVtt6ZIEoIMB5uFl9fb3Ltq7iQUFBbu3T1blOH2c2m9t9vrNZLBZZLBaHWFhYWIvt0X2EhIR0+f/AAE/DdQV0PK4roONxXQEdz1Ouq9DQ0Da188hFql29uaqqKpdtXcXb+s3prD5b6r+z3wMAAAAAAIArHlkgSk1NdYrl5eW5bJufn9+m4y9mn8nJyS5HCrk6n9VqVUVFRZvyAQAAAAAAOB8eWSAaNWqUU+zgwYMu26alpTnFRo4c6dY+fX19NWTIEKc26enpbYpFRUW1uIMZ4IrFYtGTTz7pNPUQwPnjugI6HtcV0PG4roCO563XlUcWiAYNGqSePXs6xEpLS7V9+3antmvWrHGKzZ492+19zpo1q03HdVT+6N4sFoueeuopr/sPDHAnriug43FdAR2P6wroeN56XXlkgchgMGjevHlO8SeeeMJhQeclS5bowIEDDm169+6tyZMnO8SmTJkig8Hg8Dh69Gin9jlv3jwZjY7f/iVLlmj//v3Nz3NycvTiiy869Xn33Xc7xQAAAAAAAM6XwW63292dxPkoLy/XgAEDlJOT4xDv16+fJk+erOzsbK1evVqNjY0Or3/44YeaO3euQ2zKlCnasGGDQ+zIkSNKSUnptD4laeHChXrllVccYoGBgbr22mtlNpv18ccfq6ioyOH16667Tu+//77TuQAAAAAAAM6XxxaIJGndunWaPXu2rFZrm9rff//9TgUZqe0Foo7sU5IqKio0bdo0l9PUXElNTdXGjRsVFxfXpvYAAAAAAABt4ZFTzE6bOnWqVqxY0eqCzb6+vvrZz36ml19+uUv1GRwcrM8++0xz5sxptd+JEydq3bp1FIcAAAAAAECH8+gCkdRUsElPT9crr7yi6dOnKyEhQWazWRERERo6dKgee+wx7d27V88884wMBkOX6zMsLEwfffSRPvvsM82fP1+pqakKCgqSv7+/UlJSdPPNN+u9997Txo0blZiY2CH5w3udvZZWWx41NTXuThtwi5KSEq1evVqLFi3S7NmzFRUV5XR9TJkypc3ns9vtev/993XrrbeqV69eCgwMVEhIiPr27au7775ba9eu7bw3A3QhHXVtuVojsrXHqlWrOv8NAm6Qnp6u119/XQsWLNC4ceMUFxcnf39/mc1mRUdHa+LEiXr88cdd7oDckrVr1+ruu+9W3759FRISosDAQPXq1Uu33nqrli1b1onvBugaOuq6mj9/frvvV3/6058u0rtsH4+eYgbA0fkUQa1Wq/z8/DohG6Bru+SSS5w2JDjb5MmTtX79+lbPlZ2drVtvvVWbNm06Z7trr71Wb775pkJDQ9uRKeBZOuracrUEQGtWrlypmTNntusYoKubP3++3nzzzTa1NRqN+tGPfqTnn39eJpPJZZvS0lLNmzdPH3300TnPNWnSJP3973/nQ2p4pY68rtpzrtNef/11/fCHP2zXMReDx48gAgDgfHTU5yM5OTmaMmVKq8UhqWnTgquuukpVVVUd0jfQFfHZI9CxSktL29y2sbFRL730kr7//e+7fL2iokJXXnllq8UhSfriiy80depUnTx5ss39A56iI68rb+K6rAzAKyxYsEAhISHnbNPSp0tAd2IymdSnTx+lpaW1+9h7771Xhw8fdohFRERo7ty5qq2t1UcffeRQENq6dat+8Ytf6IUXXrjgvIGu7kKurbNdeeWVGjhw4DnbuNpgBPAmBoNBI0eO1KBBg2QwGLRjxw7t27fPqd2bb76pO++8U1dccYVD/IknnnDaICcwMFBz586VxWJx2kU5IyND9913H7sow6td6HV1trFjx2rcuHHnbDN06NALyrmz8Jsh4MV++ctf8sMy0II5c+aoZ8+eGjdunEaNGqW8vDxdcskl7TrHunXrtHz5codYfHy8tm/froSEBEnS/v37NWbMGFVXVze3+eMf/6iFCxe2uz/AE3TEteXKbbfdpvnz5194goAHCgkJ0X333acHHnhAPXr0aI7b7Xa9+uqrWrhwodMxb7/9tsMvspmZmXrttdcc2gQEBGjr1q0aNGiQpKZRsZdeeqlyc3Ob2yxbtkwbN27UZZdd1tFvC3CrjriuXJk5c6aeeuqpjk73omCKGQCgW3r55Zf12GOPadKkSfL39z+vc/ztb39zij366KPNxSFJGjRokBYsWODQxmaztXuuOuApOuLaAvCtG264Qd98842effZZh19ipaaRDw888IDmzp3rdNzevXsdni9dulSNjY0Osbvuuqu5OCRJCQkJeuSRR5zO9de//vVC3gLQ5XTUdeVtGEEEeLE1a9aosLBQxcXFCggIUEJCgsaPH68hQ4a4OzXAK3zyySdOsSuvvNIpNn36dL366qsOseXLl3vsp0uAO3z99ddavHixcnNzZTKZFB0drVGjRmnMmDEym83uTg/oNHfeeWerbSZPnuy0rpDVanV43p571tlWrFjRag6AJ+mo68qVw4cP69VXX9WJEyckSZGRkRo+fLjGjx+vgICA80v4IqFABHixH/zgBy7jQ4cO1a9//Wtdc801FzkjwHscO3ZMxcXFTvF+/fo5xQYMGOAU27dvnxoaGlgHDGijltbtiouL02OPPaaHH35YRiOD49E92Ww2p1jPnj2b/1xfX+9yTRVX96z+/fs7xQoLC5Wdna2kpKQLzBTwHK1dVy1566239NZbbznFQ0NDdf/99+uXv/ylLBZLh+TY0biLAt3Q3r17NWfOHC1atMjdqQAeKyMjwykWGhrq8oYfExPjFKutrVV2dnan5AZ0JydPntRjjz2m2bNnq66uzt3pAG7x4YcfOsVmzpzZ/OesrCyX10dsbKxTzM/Pz+UmJ67ue4A3a+26aq+ysjL95je/0YQJE9q1i9rFRIEI6MaefPJJ/fvf/3Z3GoBHKisrc4oFBga6bNvScGJX5wBwflatWqWHH37Y3WkAF92bb76pTZs2OcQiIiI0b9685uct3W/ac9/inoXupC3X1fnauXOnbr/99gs+T2egQAR4EZPJpDlz5uh///d/tW/fPlVWVqq8vFzbt2/X/PnzZTAYnI75yU9+4rRgIYDWnbkr2WktTRfz9fV1Ga+srOzQnABvYjAYNGbMGD333HP68ssvVVJSIqvVqgMHDujXv/61goKCnI7585//rMzMTDdkC7jHqlWrdM899zjEDAaD3njjDYWFhTXHXN2zpPbdt7hnobto63V1pkGDBunJJ5/Uhg0bVFBQoNraWn3zzTd69dVXFRUV5dT+k08+0fr16zsh+wvDwgeAF8nKylJ8fLxTfNSoUVqyZImGDRvm9Onq4cOHtWvXLo0aNepipQl4BVefrtbX17ts21Lc1S+4AJq8++67Lu9pAwYM0IABAzRjxgyNHz9eDQ0Nza/ZbDZ98MEHevTRRy9mqoBb/Pvf/9Ydd9zhNHXs97//va677jqHWEsjWevr610u8u7qvsU9C91Be66r05599lmX96s+ffqoT58+mj17toYNG+Y0Cu+9997TlClTOiz3jsAIIsCLuPqP6UwLFy5UZGSkU3znzp2dlRLgtUJDQ51iVVVVLtu2FHd1DgBNWrunXXrppS43W+Cehu7gT3/6k7773e86/RL7m9/8xuU29S3db9pz3+KeBW/X3uvqtNbuVz179tSCBQuc4l3xfkWBCOhGfHx81LdvX6d4YWGhG7IBPFtqaqpTrLy8XDU1NU7x/Px8p5jFYmE3GOACudohkHsavN2vf/1r/c///I/DEgEGg0Evv/yyfvrTn7o8Jjk52eVIoby8PKeY1WpVRUWFU9zVfQ/wFudzXbWHp9yvKBAB3UxJSYlTrKUFCgG0rGfPni5H5KWnpzvF0tLSnGJDhgxhi3vgAnFPQ3dit9v18MMP6+c//7lD3Gw26+9//7seeOCBFo/19fXVkCFDnOKu7lmuYlFRUXyoAa90IddVe3jK/YoCEeAlPvzwwxbXOTlt//79OnjwoFO8V69enZUW4NVmzZrlFFuzZk2bYrNnz+6UnABvsH79ehUVFZ2zTW1trVasWOEU554Gb9TQ0KB58+bppZdecogHBwdrxYoVuuWWW1o9B/cswFFHXFfbtm1TVlbWOdvY7Xa9//77TvGueL+iQAR4iSeffFIDBw7U66+/7nKXid27d+vGG2+U3W53iPv7+2vq1KkXK03Aq7iaT/7CCy8oJyen+fn+/fu1dOlShzY+Pj4dsk0q4K0++OADpaam6uc//7mys7OdXs/Ly9Mtt9zi8jV+kYW3sVqtuv766/XWW285xGNiYrR+/XpNnz69TeeZN2+ejEbHX/+WLFmi/fv3Nz/PycnRiy++6HTs3XfffR6ZA11XR11XmzdvVr9+/fSjH/3I5Qfx5eXluvfee7Vt2zan17ri/cpgP/u3RQAeafjw4dqzZ4+kpiGRY8aMUZ8+fWQwGPTNN99o8+bNLrez/9nPfqZnnnnmYqcLuN0rr7yijIyM5ufl5eVasmSJQ5sePXroxhtvdIjddtttGjNmTPPzOXPmaPny5Q5tIiIiNHfuXNXV1enDDz90Wuzz4Ycf1gsvvNBRbwXoUjri2nrooYe0ePFiSU1rQAwaNEjDhg1TUFCQjh49qs2bN7tcI2XSpEnauHFjR78lwK1uv/12vfvuu07xa665Rr179z7nsWePjFi4cKFeeeUVh1hgYKCuvfZamc1mffzxx06j96677jqXox8AT9ZR19VLL73ksEt0amqqRo0apfDwcGVnZ2vr1q0u1xrq1auXDhw4IIvFcv5vohNQIAK8xJkForaaNm2aPvnkky73HxNwMUyZMkUbNmxo93FLlizR/Pnzm5/n5OTosssu0+HDh9t0/NixY7V27douOe8c6AgdcW2dWSBqqx49euiLL75QSkpKu/sGurLzvaYkOY0cr6io0LRp07R9+/Y2HZ+amqqNGzcqLi7uvPoHuqqOuq7OLhC1RXBwsNatW6dRo0adV/+diSlmgJcYOHCgDAZDm9oajUY98MADWr58OcUh4AIlJCRo3bp1mjBhQqtt586dq9WrV1McAlqRmprarvvTFVdcoc2bN1McAloRHByszz77THPmzGm17cSJE7Vu3TqKQ8A5JCcnKygoqM3tR4wYoc2bN3fJ4pAksX0K4CXeffddPfvss1q5cqX++9//Ki0tTVlZWaqoqJDRaFR4eLj69++vyy67TAsWLNAll1zi7pQBr5GcnKwvvvhCy5Yt0z//+U9t27ZNeXl58vHxUVxcnCZNmqQ77rhDV1xxhbtTBTzCAw88oDvvvFMrV67U+vXrtXfvXh05ckSlpaVqaGhQaGioUlJSNG7cON12221tKtACaBIWFqaPPvpIa9as0TvvvKMvvvhCJ0+elM1mU2xsrMaMGaNbbrlF1113XZs/fAS6q+uvv15XX321Pv30U61bt067du1SZmamiouLVVdXp5CQECUmJmrMmDG66aabNGPGjC59XTHFDAAAAAAAoJtjihkAAAAAAEA3R4EIAAAAAACgm6NABAAAAAAA0M1RIAIAAAAAAOjmKBABAAAAAAB0cxSIAAAAAAAAujkKRAAAAAAAAN0cBSIAAAAAAIBujgIRAAAAAABAN0eBCAAAAAAAoJujQAQAAAAAANDNUSACAABoo6VLl8pgMDg8nnrqqfM+31NPPeV0vqVLl3ZYvujazv67T0lJcXdKAIBujAIRAABo5qpgcfbDYrEoJiZG48eP16OPPqq9e/e6O210sPXr17v8uwcAAN6LAhEAAGiXuro6FRQUaMuWLXrhhRc0bNgwLViwQFVVVe5OrV1cFUHmz5/v7rTgoVJSUiioAQA8msndCQAAAM+3dOlSHT9+XKtWrZKPj4+70+k0AwcO1IMPPugQGzdunJuyAQAA6DgUiAAAwDn16NFDN954oyTJZrPpyJEjWrNmjWprax3arVmzRn/729/0gx/8wB1pXhRjxozRmDFj3J0GAABAh6NABAAAzik1NVUvvfSSQyw9PV0TJ05UcXGxQ/yNN97w6gIRAACAt2INIgAA0G79+/fXo48+6hTfvn276uvrXR5TWFio5557TjNmzFBiYqL8/f0VHBysvn376q677tK6devO2ee5dvz6+OOPdc011yguLk4Wi0UpKSn64Q9/qKNHjzqdZ/78+TIYDJo6darTa2+++eY51yVq7y5maWlpuuuuu5SUlCSLxaLExETdfvvt+uqrr875Xs/l008/1Q9+8AMNHjxYERERMpvNiouL05QpU/Tcc8+ptLT0vM/dmbZu3aqFCxdqxIgRio6OltlsVnR0tMaNG6df/vKXysnJOefxLa3xY7Va9fvf/16jR49WaGioAgMDNWzYMD377LOyWq2t5vXPf/5T06ZNU0REhAIDAzVgwAA9/vjjKigoOGe/kuM6VseOHXM69/ku9F1RUaFFixZp2LBhCg4OVnBwsMaOHavXX39dNputTecAAKDd7AAAAKc8+eSTdkkOj8mTJ7ts+/HHHzu1lWTPzc11avviiy/a/f39XbY/83HNNdfYS0pK2pzbG2+8YZ8/f36L5wsMDLSvWLHC4Tzz5s1rNY8zH/PmzWs+dsmSJU6vP/nkky7zfeedd+wWi8XlOX18fOzPP/+8y/e0ZMkSl+c7cuSIffz48a3mGxYWZv/3v//t8hxttW7dOpfnPh8FBQX2a665ptW8/f397S+//HKL5+nZs6fTMenp6fbevXu3eM7x48fbq6qqXJ6vvr7efvPNN7d4bExMjH3Lli0u+23t+3Sux5nOfq1nz5723bt325OSklo8/rrrrrPbbLbz+rsAAOBcGEEEAADOS0lJicu4n5+fw/OHHnpIDz/8cJtGcyxfvlyTJ09u845oixYtah5F5EpVVZWuv/567dmzp03n6ygbNmzQ9773Pad1mk6z2Wz68Y9/rH//+99tOl9GRoZGjx6tL7/8stW2paWluvnmm/V///d/7cq5MxQVFWn8+PFavnx5q22tVqsWLlyoX/3qV20+/+WXX67MzMwWX//yyy/1zDPPuHxt4cKF+te//tXisfn5+Zo1a1aL/847Q0lJia644gplZ2e32GbZsmV64403LlpOAIDugzWIAADAeXn//fedYhEREQoLC2t+/t5772nx4sVO7caMGaOhQ4eqoqJCn332mcNaRnv37tVDDz2k//3f/201h6ysLEnSpEmTNHjwYB07dkyrV69WY2Njc5va2lrdc8892rp1qyRpxowZCgsL0/Hjx/Xee+85nG/AgAGaMWOGU67t0djYqB/84AdOU4FMJpNmzZqlhIQEffXVV9qxY4cOHDjQpvPdcMMNKiwsdIiHhYVp+vTpCg8P1759+xyKR3a7XT/84Q81YcIEpaamtiv/jjR//nxlZGQ4xPz9/TVjxgzFxcUpIyNDn3/+uex2e/PrTz75pKZMmaLLLrus1fPn5+fLYrFo9uzZioyM1Icffqj8/HyHNn/+85/19NNPy9fXtzm2efNm/elPf3I6X2xsrGbNmqWGhgatWLHCaY2tsyUmJjbvave3v/1NFRUVDq+fveNda8rLyyVJwcHBuuaaa2SxWPT+++83x0975ZVXdM8997Tr3AAAtMrdQ5gAAEDX0doUM5vNZj906JD9gQcecDn95fbbb3c434ABAxxeN5lM9mXLljm0KS4utg8dOtRpClZmZmaruUmyv/LKKw7tPvnkE7vRaHRqt2nTJod2rqYHnTmdzJW2TDFzNfXObDbbN27c6NDupz/9qcv3c/YUs3/+859ObaZOnWovLy93aPfXv/7Vqd2CBQvO+X5a0hFTzLZs2eJ0/KBBg+wnT550aLd69Wq7yWRyen9nczXVKygoyL5jx47mNkeOHLFHRkY6tdu1a5fDuW666SanNsOGDbOXlpY2tzl58qS9V69ebf4+nGsqWktcnTshIcF+5MiR5jY7duyw+/r6OrUrLi5u9fwAALQHU8wAAMA5bdiwoXmBXR8fH/Xp00evvPKKUzuTyaQnnnii+fmBAweUlpbm0Ob666/Xd77zHYdYeHi4Hn/8cYeYzWbTBx980Gpuo0aN0v333+8Qu/rqq3XDDTc4tV25cmWr5+sIrvqZP3++Jk2a5BBbtGiREhISWj3ff/7zH6fYq6++quDgYIfYXXfdpT59+jjEli1b5jCa6mI6e3SWJD3//POKjY11iM2YMcNpwfD169erqKio1T7uvfdejRw5svl5SkqKZs6c6dTuyJEjzX9ubGzU6tWrndo888wzCg0NbX4eGxurX/ziF63m0NF++tOfKiUlpfn5yJEjXY5ic7UAOwAAF4ICEQAAuGAmk0lLly7VwIEDm2Onp3Sd6V//+pfLnZ1uv/12p7ZtWW/n6quvdhl3VSTYvXt3q+frCK76ueqqq5xiJpNJV1xxRavnc/V9HDhwoMvv46FDhxzalZaWOhXpLhZXec+aNctl3p999plDO7vdri1btrTax2233eYUi4+Pd4qdOfXr8OHDTlO2zGazZs2a5XTcjTfe2GoOHe183hMAAB2BAhEAALggU6ZM0ebNm52KPKe3CT9fJ0+ebLVNUlKSy3hiYqJT7Ow1fDqLq5EvrvI5V/xMF+P72BkuRt49e/Z0ip29SLokh1FUrv5+evToIaPR+cfioKAghzW1OltQUJAiIiKc4q29JwAAOgKLVAMAgHPq0aOHw0gKX19fhYWFqXfv3ho/frzLX9I7QmVlZaect7vx1O9jW/KOjIx0ivn4+LS7L39//xZfMxgM7T7f+XL1fqTze08AALQXBSIAAHBOqampeumll9p9XExMjFNs3LhxGjt2bJuO79GjR6ttWtoO/Pjx406xqKioNvV7oVz9kn/8+HGX68i4yvNsMTExOnbsWPNzo9GoBx54oM2Fi169erWpXUeLiYlxmt521113Oa2d1JLhw4d3Qlau/x3k5OS4bFtVVXVRt7kHAMCdKBABAIBOMXr0aKdYUFBQm4pNdrvdYevzlqxatUq/+tWvnOKuFiE+u+DgalTG2VvTn4/hw4dr8+bNDrFPP/1U119/vUOsoaFBn3/+eavnGz16tEOBqLGxUTfffLMmTpzY6rE2m81to09Gjx6tDRs2OMSmTZvmcr2ps3Vm3pdccolCQkIc1iEqLS3VV1995fRv1tVC2y1p6d8To38AAJ6CNYgAAECnGDRokPr16+cQW7NmjZ5++mnV1dW5PObQoUN6/vnn1b9/f2VlZbXax/bt2/Xaa685xD799FOXO3+dvaB1UFCQU5uOWNDZ1cLZS5cudSoaPf300zpx4kSr5zu7sCRJ3/ve95Senu6yvdVq1YoVK3Trrbfqvvvua2PWHc9V3g8++GCLi4/X19dr3bp1uueee5x2uutIRqPR5SLmDz/8sKqrq5uf5+fna9GiRW0+r6t/TwcOHDi/JAEAcANGEAEAgE6zaNEi3XLLLQ6xp556Sq+//romTJiguLg41dXVKTc3V3v37m3TlKuz3X///frHP/6hwYMH69ixY1q1apXTAr6XXnqpJkyY4BDr3bu3DAaDw0ilHTt2aOLEiRo2bJjMZrMk6dFHH21xMWxXrr76aqWmpiojI6M5VltbqylTpmjWrFlKSEjQV199pe3bt7fpfLfccot+85vfaN++fc2xw4cPa+DAgZowYYJSU1MVGBiokpISHTp0SPv27VNNTY0kad68eW3Ouy0eeuihVtv88pe/VEREhMaPH6+rr75aK1eubH6tqKhIEyZM0KhRo9S/f3+FhYWprKxMhw8f1t69e5vXHZo8eXKH5n22Bx98UP/6178cYps2bVLfvn01c+ZM1dfXa/ny5SouLm7zOfv06aO9e/c6xK666irNnDlTISEhkqTLLrtMN9xww4W/AQAAOgEFIgAA0Gluvvlmbdq0SX/84x8d4nl5eVq2bNkFn3/gwIE6cOCANm7cqI0bN7psYzab9Ze//MUpHhISokmTJjkdt3nzZofRPnfccUe7CkQ+Pj76y1/+ounTpzsUqurr6/Xhhx86tE1KSmpxHaXTjEaj3n//fY0fP95hBy673a5NmzZp06ZNbc7tQi1evLjVNg899FDzTlxvvvmmxo8fr8zMTIc2O3bs0I4dOzolx7aYMGGCfvjDH+pPf/qTQ/zEiRP661//2vw8Pj5eNTU1bVqHaNasWU5T0nJzc7VkyZLm5w0NDRSIAABdFlPMAABAp1q8eLFeeuklBQYGtvmYcePGNY+6OJcf//jHWrhwYYuvBwQE6D//+Y9GjBjh8vXnnnvO5RbiF2rq1KlaunRp8ygkV37yk5/orrvuatP5+vTpo+3bt2vSpEltziE8PLzNC4J3lujoaG3ZskXXXnttm48JCAjQZZdd1olZNXn55Zd10003tfh6jx49tGrVKqd/ty39nd5xxx0u190CAMBTUCACAACd7sEHH1R2drZefPFFXXPNNUpOTlZgYKBMJpPCwsI0ePBg3XzzzXr55ZeVkZGhL7/8snkUSmv++Mc/auXKlZozZ45iY2NlNpuVnJyse+65R/v27dOcOXNaPHb8+PHatm2b7rzzTqWkpMhisXTUW9add96pXbt2ad68eerRo4fMZrPi4uJ03XXX6fPPP9ezzz7brvOlpKRo48aN2rBhg+677z6NGDFCkZGRMplM8vf3V48ePTRlyhQ98sgj+uSTT5Sbm6v/+Z//6bD3c76ioqL0wQcfaNeuXXrkkUc0duxYxcTEyGw2y8/PT3FxcZo4caLuu+8+vffee8rLy3O58HhHM5lM+te//qV//OMfmjp1qsLCwhQQEKABAwboJz/5ifbu3asBAwaooKDA4bjo6GiX5zObzVq3bp0WLVqkkSNHKjg4uM07zQEA0BUY7G3ZIgQAAMDNnnrqKT399NMOsSVLlmj+/PnuSQhe76OPPnIa/TRr1iytWLHCTRkBANB5GEEEAACAbmf16tV66aWXVFhY6PL1r776yuUIrLlz53Z2agAAuAWLVAMAAKDbyc3N1cMPP6zHHntMI0aM0IABAxQaGqrKykrt3btXO3fudDomJSWFEWsAAK9FgQgAAADdls1m0/bt27V9+/ZztgsODtb777/foetUAQDQlTDFDAAAADiHMWPGaOvWrS3uhgcAgDdgBBEAAAC6nVtvvVXh4eFatWqVdu3apby8POXn58tmsyk0NFQpKSkaM2aMbrzxRk2ePNnd6QIA0OnYxQwAAAAAAKCbY4oZAAAAAABAN0eBCAAAAAAAoJujQAQAAAAAANDNUSACAAAAAADo5igQAQAAAAAAdHMUiAAAAAAAALo5CkQAAAAAAADdHAUiAAAAAACAbu7/A+/onqQoGJEAAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12,8))\n", + "\n", + "plt.rcParams[\"font.weight\"] = \"bold\"\n", + "plt.rcParams[\"axes.labelweight\"] = \"bold\"\n", + "plt.rcParams[\"font.size\"] = 20\n", + "\n", + "sns.histplot(data=dic,x='pep_len',kde=True,binwidth=1, stat='density', common_norm=False,)\n", + "\n", + "plt.xlim(2,26)\n", + "\n", + "ax = plt.gca()\n", + "\n", + "# Set font size and weight for labels and ticks\n", + "ax.set_xlabel(\"Peptide Length\", fontsize=20, weight='bold')\n", + "ax.set_ylabel(\"Fraction of Peptides\", fontsize=20, weight='bold')\n", + "ax.legend(fontsize=18)\n", + "\n", + "plt.tight_layout() # Add this line\n", + "\n", + "plt.savefig('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Results/plot/data1.png',dpi=1200,bbox_inches='tight')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABIgAAAL4CAYAAAAQ3sw4AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADPiElEQVR4nOzdeXxU1f3/8fedJQkhGzsCYUc2lU0FFwgoIojggrtYBKutpXaxrQu2dfnWurSiuNRaF9BSa/sTRVQQRAE3RBBQIBAE2QOBAEnINpPl/v5IZ8wwk2SSO8nMZF7Px2Mezpw5954zkwk4b87nXMM0TVMAAAAAAACIWbZwTwAAAAAAAADhRUAEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDhHuCeA6FNZWans7GwlJyfLMIxwTwcAAAAAgIhnmqZOnDihTp06yWaLvPU6BESot+zsbKWnp4d7GgAAAAAARJ19+/apS5cu4Z6GHwIi1FtycrKkqg91SkpKmGcDAAAAAEDkKygoUHp6uvc7daQhIEK9ecrKUlJSCIgAAAAAAKiHSN2qJfKK3gAAAAAAANCkCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAARAjTNOVyuWSaZrinAgAAgBhDQAQAQIRwu916fNEGud3ucE8FAAAAMYaACACACOJwxoV7CgAAAIhBBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcc0iICouLtbf/vY3jRs3Tunp6UpISFDbtm01ePBg3X333dq2bVtEjpmXl6cFCxbod7/7ncaNG6devXopOTlZDodDqamp6t+/v6ZOnapFixapoqKi1nOtXLlShmHU6zZixIhQvR0AAAAAACCKOcI9AatWrlypadOmae/evT7tLpdLR48e1TfffKMnn3xSd999tx566CEZhhERYy5fvlwTJkxQeXl5wDEKCgpUUFCgbdu26V//+pfOPPNMvf766+rTp4/l+QMAAAAAAFQX1SuIVqxYoUsuucQvqDlZWVmZ/vSnP+mOO+6ImDELCwtrDIcCWbdunUaPHq3s7Ox6zRcAAAAAAKAuUbuCqKCgQFOnTlVJSYlPe9++fZWRkaG9e/dq2bJlqqys9D733HPPady4cZo8eXLEjXnKKafo3HPPVdu2bbVr1y6tWLFCZWVlPn2ys7N1991365///Gedc01OTtaMGTNq7dO9e/c6zwMAAAAAAJq/qA2InnjiCb/VNGPHjtXixYvldDolSXPnzvULSe68805NmjSpQaVmjTHmpZdeqt/+9rcaNWqUz/OZmZkaP3689u3b59N/wYIF+sc//qEWLVrUOtfWrVvrqaeeqs/LAwAAAAAAMSoqS8xM09S8efP82h999FFvUCNJ06dP18CBA3367Ny5U6tWrQr7mOnp6fr444/17rvvKiMjwy88GjBggObMmeM3XklJiXbs2FHv+QMAAAAAANQkKlcQbd682W8PoFatWmnYsGF+fceOHastW7b4tL333nsaPXp0WMcMdNzJMjIyArafXOIWiMvl0htvvKGsrCwVFRUpNTVVPXv21MiRI9WlS5c6jwcAAAAAALEjKgOi9evX+7X17ds3YN/+/fv7tW3YsCEqxqzp0vbdunWr89hDhw7p+uuv92s3DEMTJkzQX//614DzBAAAAAAAsScqS8wClVh16NAhYN/27dsHdXwkjvnOO+/4tQ0ZMqTGcYNhmqYWL16sM888U0uWLGnweQAAAAAAQPMRlQFRfn6+X1vLli0D9k1MTAzq+EgbMycnR7///e/92n/961/X6zw1KS4u1jXXXKNdu3bV2dflcqmgoMDnBgAAAAAAmo+oDIiKi4v92hyOwNVy1TeQ9igsLIzoMY8dO6bx48crJyfHp/3yyy/XTTfdVONxqampmjFjht58803t2rVLpaWlOnTokD788EOdf/75Aef04IMP1jmfRx55RKmpqd5benp60K8FAAAAAABEvqjcgyjQCp2ysrKAfQO1JyUlReyYBw4c0MUXX+y3yfWIESM0f/78Go8bOnSosrOz/ebZoUMHdejQQRdeeKGuvPJKLVy40Of5d955RxUVFbLb7TWe+95779Wdd97pfVxQUEBIBAAAAABAMxKVK4hSU1P92oqKigL2DdQe6PhIGPO7777Teeed5xcODR8+XEuWLKmxpE2SUlJSAoZYHoZh6IEHHvBrz8vL0/fff1/rvOLj45WSkuJzAwAAAAAAzUdUBkS9e/f2azu5HMvj8OHDQR0f7jE3bNig888/X3v27PFpv+CCC7R8+XKlpaUFP9ka1HTVstzcXMvnBgAAAAAA0SsqA6Jhw4b5tWVlZQXsu3XrVr+2oUOHRtSYq1at0ujRo/2CpauvvlpLlixpUElcIMePHw/YXtvKJAAAAAAA0PxFZUA0cOBAdevWzactLy9P69at8+u7fPlyv7aJEydGzJiLFi3S+PHj/a4MNnPmTL3xxhuKi4urc24lJSV6//336+y3YMECvza73e73ugAAAAAAQGyJyoDIMAxNmzbNr33WrFk+G0TPnTtXmZmZPn169eqljIwMn7bRo0fLMAyf2+7duxt1TEl69dVXNWXKFJWWlvq0P/TQQ3r22WdlswX343G5XLr00ks1ZswYLVq0SBUVFX593nzzTd19991+7eedd16D9mQCAAAAAADNh2GaphnuSTREQUGB+vfvr+zsbJ/2vn37KiMjQ/v27dPSpUtVWVnp8/w777yjyZMn+7SNHj1aq1at8mnbtWuXunfv3mhjLlu2TOPHj9fJb3/Pnj01adKkWl/7DTfcoLPPPtv7OC8vT61atfI+Tk1N1bnnnqv09HSdOHFCmzZt0ubNmwOe68MPP9TYsWNrHe9kBQUFSk1NVX5+PhtWA0AIuVwuzV6yRXdOGKj4+PhwTwcAAAAhFOnfpaPyMvdS1VW75s+fr4kTJ6qkpMTbnpWVVePeQDNnzvQLasI1ZnZ2tl84JEnff/+95syZU+s8Bg8e7BMQnSw/P19Lliyp9RxS1eqn+oZDAAAAAACg+YnKEjOPMWPG6P3331d6enqt/ZxOp+677z4988wzUTlmXeLi4tSnT5+g+yclJen555/Xww8/3IizAgAAAAAA0SJqVxB5jBkzRtu2bdPcuXO1cOFCZWZmKjc3V0lJSerSpYvGjRunW265Rf369YvqMWuTmJio7du36+uvv9ayZcu0evVqbd++XQcPHlRRUZESEhLUtm1bDRo0SGPHjtVNN92ktLS0JpkbAAAAAACIfFG7BxHCJ9LrJgEgWrEHEQAAQPMV6d+lo7rEDAAAAAAAANYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ4wiIAAAAAAAAYhwBEQAAAAAAQIwjIAIAAAAAAIhxBEQAAAAAAAAxjoAIAAAAAAAgxhEQAQAAAAAAxDgCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4AiIAAAAAAIAYR0AEAAAAAAAQ45pFQFRcXKy//e1vGjdunNLT05WQkKC2bdtq8ODBuvvuu7Vt27aIHDMvL08LFizQ7373O40bN069evVScnKyHA6HUlNT1b9/f02dOlWLFi1SRUVF0HPbt2+fHnzwQQ0fPlwdOnRQfHy8OnXqpFGjRukvf/mLjh07ZuWlAwAAAACAZsYwTdMM9ySsWLlypaZNm6a9e/fW2MfpdOruu+/WQw89JMMwImLM5cuXa8KECSovLw9qzDPPPFOvv/66+vTpU2u/Z555Rvfee6+Kiopq7NO2bVv9/e9/15QpU4Ia+2QFBQVKTU1Vfn6+UlJSGnQOAIA/l8ul2Uu26M4JAxUfHx/u6QAAACCEIv27dFSvIFqxYoUuueSSWoMaSSorK9Of/vQn3XHHHREzZmFhYdDhkCStW7dOo0ePVnZ2do19Hn30Uf3iF7+oNRySpNzcXF177bX6z3/+E/T4AAAAAACg+YragKigoEBTp05VSUmJT3vfvn112223afz48bLZfF/ec889p0WLFkXkmKeccoqmTJmin/zkJxo3bpycTqdfn+zsbN19990Bj1+7dq3uu+8+v/ZRo0bpJz/5iYYOHerTXlFRoVtvvVX79++vc24AAAAAAKB5i9qA6IknnvBbTTN27Fht2rRJL7zwgpYsWaKXXnrJ77g777xTDa2qa4wxL730Uq1cuVIHDhzQm2++qb///e9aunSpNm7cqPT0dL/+CxYs8AuoJOm3v/2tKisrfdruv/9+rVq1Sn//+9+1bt06TZs2zef5EydO6P7776/zdQMAAAAAgOYtKvcgMk1T3bt39yvzWrdunYYNG+bTdtppp2nLli0+bStWrNDo0aPDOubXX3+tgoICjRkzpsYx3377bV155ZV+7d9++61OP/107+OdO3eqd+/ePn1at26tQ4cO+axEysnJUadOnXyCpJYtW+rQoUNKSkqqcR4ni/S6SQCIRqZp6sSJE3ru4+9054TT2IMIAACgmYn079JRuYJo8+bNfkFNq1at/IIaqWqFz8nee++9sI85bNiwWsMhScrIyAjYfvIKosWLFwc89uQytQ4dOvgES5JUVFSklStX1joPAEDjc7vdeuK9jfW6aiUAAAAQKlEZEK1fv96vrW/fvgH79u/f369tw4YNUTFmTV8SunXrFva5AQBCz+GMC/cUAAAAEKOiMiDasWOHX1uHDh0C9m3fvn1Qx0fimO+8845f25AhQ/zGDcfcAAAAAABA8+EI9wQaIj8/36+tZcuWAfsmJiYGdXykjZmTk6Pf//73fu2//vWvm3xuLpdLLpfL+7igoKDW/gAAAAAAILpE5Qqi4uJivzaHI3DWFehy8YWFhRE95rFjxzR+/Hjl5OT4tF9++eW66aabmnxujzzyiFJTU723QFdXAwAAAAAA0SsqA6JAq2DKysoC9g3UXp8rdjX1mAcOHNCoUaO0ceNGn/YRI0Zo/vz5YZnbvffeq/z8fO9t3759tfYHAAAAAADRJSpLzFJTU/3aioqKAvYN1B7o+EgY87vvvtNFF12kPXv2+LQPHz5cS5YsqbFsrLHnFh8fz+WWAQAAAABoxqJyBVHv3r392k4ux/I4fPhwUMeHe8wNGzbo/PPP9wuHLrjgAi1fvlxpaWlhmxsAAAAAAGjeojIgGjZsmF9bVlZWwL5bt271axs6dGhEjblq1SqNHj3aL7y5+uqrtWTJkjpLwALNbdu2bSGZGwAAAAAAaP6iMiAaOHCgunXr5tOWl5endevW+fVdvny5X9vEiRMjZsxFixZp/PjxflcGmzlzpt544w3FxcXVObcJEyb4tX3yySd++w3l5ORo8+bNPm0tW7ZURkZGnWMAAAAAAIDmKyoDIsMwNG3aNL/2WbNm+YQic+fOVWZmpk+fXr16+QUio0ePlmEYPrfdu3c36piS9Oqrr2rKlCkqLS31aX/ooYf07LPPymYL7sfTq1cvjRo1yqft+PHjevjhh72PTdPUPffco8rKSp9+1157bYM27QYAAAAAAM2HYZqm2ZgDlJeXKzMzUy6XS7169VLr1q1Dct6CggL1799f2dnZPu19+/ZVRkaG9u3bp6VLl/oFIu+8844mT57s0zZ69GitWrXKp23Xrl3q3r17o425bNkyjR8/Xie//T179tSkSZNqfe033HCDzj77bJ+2r776Suecc47f2KNGjVL//v21du1arV+/3ue55ORkZWZmqkuXLrWOd7KCggKlpqYqPz9fKSkp9ToWABCYy+XS44s2yLDZ9LtLB3FxAAAAgGYm0r9LW7qKWVlZmdauXet9nJaWpgEDBngfP/HEE3rooYdUWFgoSbLZbLrmmmv0/PPPW34zUlJSNH/+fE2cOFElJSXe9qysrBr3Bpo5c6ZfUBOuMbOzs/3CIUn6/vvvNWfOnFrnMXjwYL+A6Oyzz9bDDz+se++916f9k08+0SeffOJ3DrvdrhdffLHe4RAAAAAAAGh+LJWYffrppxo5cqT39q9//cv73D//+U/97ne/04kTJ2SapkzTVEVFhd544w1dffXVlicuSWPGjNH777+v9PT0Wvs5nU7dd999euaZZ6JyzGDdc889mjNnjhITE2vt16ZNG73xxhu69tprm2hmAAAAAAAgkllaQfTpp596V8EYhqEpU6Z4n3vssce87dWZpqnly5dr2bJlGjdunJXhJVUFNtu2bdPcuXO1cOFCZWZmKjc3V0lJSerSpYvGjRunW265Rf369bM8VjjHDNYvfvELXXHFFXrppZe0ePFi7d69W/n5+WrdurX69OmjSZMm6cc//nHISv0AAAAAAED0s7QH0RVXXKF33nlHUlV52bFjxyRVlUn17t3bGw5VD5FM05RhGJo+fbpeeuklq/NHGER63SQARCP2IAIAAGjeIv27tKUSs++//15SVfBzxhlneNtXr14tSd4w6L777tOPf/xj72PTNPX1119bGRoAAAAAAAAhYqnELDc317tKqGfPnt72rVu3SqoKjjIyMvR///d/kqR169bpm2++kSTt3bvXytAAAAAAAAAIEUsriI4ePeq9n5SU5L1f/YpeZ511lvf+0KFDveVmniubAQAAAAAAILwsBUQ22w+H5+Xlee9XD4h69+7tvd+iRQvv/ZM3rwYAAAAAAEB4WAqIWrVqJalqr6GVK1fK5XJp//793hIzSerbt6/3/okTJ7z3U1NTrQwNAAAAAACAELG0B9Gpp56qgwcPyjAMHThwQP369VNpaakqKiokVa0wGjRokLf/rl27JFWtHkpPT7cyNAAAAAAAAELE0gqisWPHeu+bpqk9e/YoJydHUlUINGLECO+l20zT1KZNm7ylZf3797cyNAAAAAAAAELEUkB06623ejenNgzD5yZJv/zlL719165d67NP0dlnn21laAAAAAAAAISIpYCoffv2+s9//qPU1FSZpulz++Uvf6mrrrrK2/df//qXJHmvYlZ99REAAAAAAADCx9IeRJI0YcIE7dy5U4sXL9bevXuVnJysCy64QAMGDPDpd+ONN2ry5MmSqlYbUWIGAAAAAAAQGSwHRFLV1cxuvPHGWvtQUgYAAAAAABCZLAVEDz30kPf+iBEjNG7cuFr7l5SUqKSkxPu4devWVoYHAAAAAABACFgKiB544AGfDanrCohmzZqlp59+WlJVmVl5ebmV4QEAAAAAABAClkvMTNP0hkTB9gcAAAAAAEDksHQVM0n1CodYMQQAAAAAABB5LAdE9bF9+/amHA4AAAAAAABBqFeJWfVNqU/25Zdf1vh8RUWFMjMztXz5chmGIdM05XQ66zdTAAAAAAAANIp6BUTVN6X28OwptGbNGq1Zs6bW46vvV9S+ffv6DA0AAAAAAIBG0qBNqgNtNB3M5tOe1UOGYWjEiBENGRoAAAAAAAAh1qCAyLMKqHooVJ/Nqm02m371q181ZGgAAAAAAACEWL03qTZN03urqb22W7du3fTGG2/o3HPPDdmLAAAAAAAAQMPVawXR3LlzvfdN09SMGTO8K4cuuugiXX/99QGPs9vtSkpKUp8+fTRgwIB6rTYCAAAAAABA46pXQDRt2jSfxzNmzPDeHzBggN/zAAAAAAAAiHwN2oPIY9SoUd7VQL179w7JhAAAAAAAANC0LAVEK1euDNE0AAAAAAAAEC6WAqJANm7cqNWrV+vQoUMqLCzUqFGjdNlll4V6GAAAAAAAAIRIyAKif//733rggQe0Y8cOn3bTNDVp0iQNHz5cR48elSRdeOGFevHFF0M1NAAAAAAAACyo92XuT2aapm699VZNnTpV3333nc8l7b2D2GwaNWqUdu/erd27d+uf//ynjh8/bnVoAAAAAAAAhIDlgOgPf/iDXn75ZZmmKcMwvLeTXXPNNZIkwzBUVlamDz74wOrQAAAAAAAACAFLAdGOHTv0+OOP+4RC1VcOVXf22WerVatW3scrVqywMjQAAAAAAABCxFJA9MILL6i8vFxSVTCUlJSkW2+9NWBIZBiGzjjjDO9zmzZtsjI0AAAAAAAAQsRSQPTRRx/JMAyZpimn06nPPvtML7zwgiQFLDPr3r27pKowadeuXVaGBgAAAAAAQIhYCoj27NkjqSoMuvjii3X66afX2j8pKcl7Pz8/38rQAAAAAAAACBFLAVFhYaH3fpcuXers77nMvRR4hREAAAAAAACanqWAqPqm057VRLX55ptvvMFQ9WMBAAAAAAAQPpYCou7du8s0TZmmqeXLl2vnzp019n3vvfe0detWSVWrh0499VQrQwMAAAAAACBELAVEo0aNklQV+JSVlWn06NHeTao9Dhw4oEceeUTXXnutd0NrSRo5cqSVoQEAAAAAABAihhnomvRB2rp1q0477TTvY9M0fUIg6Ye9hqo/Z7fblZWVpZ49e1qYOsKloKBAqampys/PV0pKSrinAwDNgsvl0uOLNsiw2fS7SwcpPj4+3FMCAABACEX6d2lLK4j69++vadOmeQOhk8MhSd4SNE9QZBiGZsyYQTgEAAAAAAAQISwFRJL03HPPafjw4T4hUaCbVBUWDR8+XE899ZTVYQEAAAAAABAilgOiFi1aaOXKlZo5c6YcDod3xdDJN7vdrttvv10ff/yxWrRoEYq5AwAAAAAAIAQcoThJfHy8nnnmGd1zzz1atGiR1qxZoyNHjkiS2rdvr7POOkuTJk1Senp6KIYDAAAAAABACIUkIPLo3Lmzbr/9dt1+++2hPC0AAAAAAAAakeUSMwAAAAAAAEQ3AiIAAAAAAIAYZ6nEbMaMGQ06Lj4+Xu3atdOgQYM0duxYpaamWpkGAAAAAAAALLAUEM2bN897CfuGatGihX7961/r/vvvl8MR0i2RAAAAAAAAEISQlJjVdGn7YG7FxcX685//rIkTJ8rtdodiOgAAAAAAAKiHkAREhmFYupmmqeXLl+u+++4LxXQAAAAAAABQD5YDIs9KoJMfn3yr6XlJ3pBozpw5OnjwoNUpAQAAAAAAoB4sBUQrVqzQ4sWL1aNHD2/gk5GRoWeeeUYLFy7UO++8o2effVZjxozxhkEDBw7UkiVL9Oqrr+qKK67wCY8qKir02muvWXtFAAAAAAAAqBfDrJ7QNMAll1yiDz74QDabTW+88YauuuqqgP3eeustXX311ZKkKVOm6L///a8k6bHHHtO9997r3ez6kksu0bvvvmtlSmhkBQUFSk1NVX5+vlJSUsI9HQBoFlwulx5ftEGGzabfXTpI8fHx4Z4SAAAAQijSv0tbWkH02muv6YMPPpBhGLroootqDIck6corr9S4ceNkmqYWLFigN954Q5L0u9/9Th07dpRUVX62efNmK1MCAAAAAABAPVkKiObOneu937Nnzzr7V+/z4osvVk3AZtP555/vLTU7duyYlSkBAAAAAACgniwFRJs3b/aWhm3YsKHO/ps2bZJUtVLIc1+SOnTo4L3vcrmsTAkAAAAAAAD1ZCkgKioqklQV+KxZs0b/93//p/Ly8oB9n376aX322WfeQMlzrCS53W7v/datW1uZEgAAAAAAAOrJYeXg7t27Kysry3uZ+gceeEAvvPCCJk6cqPT0dLVo0UIHDhzQ8uXLtWXLFknylpJ1797de549e/ZIqrrcffv27a1MCQAAAAAAAPVkKSC64oor9Mgjj8gwDG9IlJ2drZdeesmnnycU8vQxDENXXnmlpKpL269du9a7sqhPnz5WpgQAAAAAAIB6slRidvfdd6tTp07ex9WDouo3z3MenTt31l133SVJWr58uY4fP+7tN3r0aCtTAgAAAAAAQD1ZWkGUkpKiZcuW6aKLLtLBgwe9IVD1MKg60zTVqVMnLV26VMnJyZKkr776ShkZGd4+npVFAAAAAAAAaBqWVhBJ0oABA7R582bdeuutio+P91s95LnFx8frtttu06ZNm9S/f3/v8X/4wx+0YsUK7+2UU06xOiUAAAAAAADUg6UVRB6tWrXSCy+8oEcffVQff/yxNm7cqKNHj0qS2rZtq8GDB2vMmDFq1apVKIYDAAAAAABACIUkIPJo1aqVpkyZoilTpoTytAAAAAAAAGhElkrM7Ha795aamqri4uJQzQsAAAAAAABNxFJA5HA4vHsMjRkzRomJiaGaFwAAAAAAAJqIpYCoY8eO3kvbd+7cOVRzAgAAAAAAQBOyFBCNGjVKpmlKkg4cOBCSCQEAAAAAAKBpWQqI7rjjDtntdpmmqQ8//FB79+4N1bwAAAAAAADQRCwFRGeffbYee+wxSVJpaanGjh2rzz//PCQTAwAAAAAAQNOwdJn7GTNmSJI6d+6sAwcOaMeOHRo1apR69Oih008/XWlpaTIMI+CxhmHo5ZdftjI8AAAAAAAAQsBSQDRv3jxvAGQYhveKZt9//7127dpV43GmaRIQAQAAAAAARAhLAZGHZ6Pq6quFPG0AAAAAAACIbCEJiAKVkdVUWiYRHgEAAAAAAEQSywERYQ8AAAAAAEB0sxQQrVixIlTzAAAAAAAAQJhYCogyMjJCNQ8AAAAAAACEiS3cEwAAAAAAAEB4ERABAAAAAADEuJBcxay6PXv2KDc3V8XFxXVuYD1q1KhQDw8AAAAAAIB6CklAtGvXLj388MNauHChjh8/HtQxhmGovLw8FMMDAAAAAADAAssB0aeffqrJkyeroKCAS94DAAAAAABEIUsBUVFRkW644Qbl5+dLqloVFAyCJAAAAAAAgMhhKSD697//rQMHDvgEQ4HCH8/zBEMAAAAAAACRx1JAtGTJEp/HPXv2lNPp1LZt2yRVBUPXXnut1q1bpx07dsgwDJ1zzjnq06ePlWEBAAAAAAAQQpYuc//tt9/KMAyZpqmUlBStXr1aF198sU+f119/XVlZWfrFL34h0zS1ZcsW3XjjjZo7d66liQMAAAAAACA0LAVER44ckVS1UmjkyJFq27ZtwH6GYeiJJ55Qr169lJ+fr2uuuUa7d++2MjQAAAAAAABCxFJAVFxc7L3fpUuXqhPafE9ZVlYmSbLb7TrvvPMkSfn5+XryySetDA0AAAAAAIAQsRQQJScne+87HFXbGSUmJvr02bdvn99xpmn67V8EAAAAAACA8LAUELVu3dp7ZbJjx45526Qfrlz20ksvSZKOHj2qDz74wNt+4MABK0MDAAAAAAAgRCwFRNX3HDp69Kgk+VyhzDRNPfbYYxo0aJD69eunw4cPe59zOp1WhgYAAAAAAECIWAqITjvtNO/9PXv2SJLOO+882e12SfJe4WzTpk06evSo97FhGBo8eLCVoQEAAAAAABAiQQdEe/fu9d7y8vIkSWeddZakqpVCWVlZOnjwoFq1aqXJkyd7S888JWWe/3r8+Mc/DsX8AQAAAAAAYFHQAVH37t3Vo0cP9ejRQw899JAk6dxzz9WwYcM0bNgwDR06VFu2bJEk/fWvf1Xbtm39QiLP42uvvVZTp04N6QsBAAAAAABAwzjq09lTHuZx2mmnae3atX79evTooTVr1uiuu+7S4sWLVVJSIqkqZLrjjjv0q1/9ytqsAQAAAAAAEDL1CohOLhOrTY8ePfT//t//U0VFhXJzcxUXF6dWrVrVe4IAAAAAAABoXPUKiBrCbrerQ4cOjT0MAAAAAAAAGsjSVcwAAAAAAAAQ/QiIAAAAAAAAYlyDSsy+/PJL75XMrPjjH/9o+RwAAAAAAACwpt4BkWmaWrNmjdasWWN58FAFRMXFxZo3b54WLlyorVu36siRI0pKSlKXLl108cUXa/r06erXr19IxgrlmOXl5fr222+97+eaNWuUlZUl0zR9+u3atUvdu3ev9Vzz5s3T9OnT6/Uarr32Wr3xxhv1OgYAAAAAADQ/DVpBdHKA0RD1uSJabVauXKlp06Zp7969Pu0ul0tHjx7VN998oyeffFJ33323HnrooZCMG6oxH374YT3wwAOW5wMAAAAAAGBFgwIiqyFLKAImSVqxYoUmTpyokpKSWvuVlZXpT3/6k44fP65nn302YsYM1fsAAAAAAABgRdhWEFlVUFCgqVOn+gU1ffv2VUZGhvbu3atly5apsrLS+9xzzz2ncePGafLkyRE5Znp6uk6cOKG8vLwGza+6zp0766qrrqq1z9ChQy2PAwAAAAAAol+9AyLDMNSpUyf17t27MeYTtCeeeELZ2dk+bWPHjtXixYvldDolSXPnztWMGTN8+tx5552aNGlSg1ZBhXrMvn376q677tKIESM0YsQInXLKKRo9erRWrVpV77mdrHfv3nrqqacsnwcAAAAAADR/DVpBdPXVV2v27NmhnkvQTNPUvHnz/NofffRRb1AjSdOnT9cTTzyhLVu2eNt27typVatWafTo0WEf8/rrr9f1119fr3kAAAAAAACEWoMConDbvHmz3wbRrVq10rBhw/z6jh071ieskaT33nuv3gFROMa0Ij8/X6+++qq+//57lZSUqFWrVurbt69Gjhypdu3aNdk8AAAAAABA5IvKgGj9+vV+bX379g3Yt3///n5tGzZsiIoxrdi4caNuvvlmv3aHw6Grr75af/nLX9S5c+cmnRMAAAAAAIhMtnBPoCF27Njh19ahQ4eAfdu3bx/U8ZE4ZmMoLy/Xv//9bw0ePDhg6AUAAAAAAGJPVAZE+fn5fm0tW7YM2DcxMTGo4yNxzMaUm5urSZMmBTUvl8ulgoICnxsAAAAAAGg+6hUQmaYZEZe4Ly4u9mtzOAJXy1XfQNqjsLAwKsasrw4dOujnP/+53n33Xe3bt08ul0v79+/XwoULddppp/n1z87O1pNPPlnneR955BGlpqZ6b+np6Y0xfQAAAAAAECZB70F0//33e++PGDGiUSYTrEArdMrKygL2DdSelJQUFWPWx6RJk3TjjTf6hVOdO3dW586dNX78eI0aNUpfffWVz/MLFizQAw88UOu57733Xt15553exwUFBYREAAAAAAA0Iw0KiMItNTXVr62oqChg30DtgY6PxDHro02bNrU+Hx8fr1mzZunyyy/3ad+8ebPcbrfi4uJqPTY+Pj4U0wQAAAAAABEoKvcg6t27t19bTk5OwL6HDx8O6vhIHDPUAl1dTZKOHj3axDMBAAAAAACRJCoDomHDhvm1ZWVlBey7detWv7ahQ4dGxZihdvz48YDtNW22DQAAAAAAYkNUBkQDBw5Ut27dfNry8vK0bt06v77Lly/3a5s4cWJUjBms7OxsrV69us5+CxYs8Gtr27atUlJSGmNaAAAAAAAgSkRlQGQYhqZNm+bXPmvWLJ8NoufOnavMzEyfPr169VJGRoZP2+jRo2UYhs9t9+7djTpmKB0+fFjnnnuurrjiCq1cudLvSnOVlZV6/vnnNXv2bL9jL7nkkkabFwAAAAAAiA5Bb1IdaX7zm9/opZdeUnZ2trftww8/1Omnn66MjAzt27dPS5cu9Ttu9uzZMgwjIsb86quv9Prrr/u07dixw6/fQw895LPKp3fv3vr5z3/u12/hwoVauHCh2rVrp3POOUedOnXS0aNHtX79eu3cudOvv8Ph0D333FP7iwYAAAAAAM1e1AZEKSkpmj9/viZOnKiSkhJve1ZWVo17A82cOVOTJ0+OmDEzMzM1Z86cOsedO3euz+OMjIyAAZHHkSNHtGjRojrP+9xzz9W4cTUAAAAAAIgdUVli5jFmzBi9//77Sk9Pr7Wf0+nUfffdp2eeeSYqx6xLamqqunTpEnT/9u3b66233tJtt93WiLMCAAAAAADRImpXEHmMGTNG27Zt09y5c7Vw4UJlZmYqNzdXSUlJ6tKli8aNG6dbbrlF/fr1i+oxa9OjRw/t2bNHn3/+uZYvX66vvvpK3333nXJyclRSUqLExES1b99eQ4cO1YQJE3TdddepRYsWTTI3AAAAAAAQ+Qzz5B2NA2jdurX3/u23366HH35YUtXeOB4jRozQuHHjGmGKiDQFBQVKTU1Vfn4+V0ADgBBxuVx6fNEGGTabfnfpIMXHx4d7SgAAAAihSP8uHdQKory8PElVV/KqvvfOAw884N18+Ze//CUBEQAAAAAAQBQKeg+imq78FcQCJAAAAAAAAESwem9SnZOT4/O4oZeMBwAAAAAAQGQIqsQsMTFRJSUlMk1TCxcu1J///Ge/q2ZlZmbqtddeq9fgP/rRj+rVHwAAAAAAAKEX1CbVffv21Y4dOyRVlZR5Vg1VP7QhK4kqKirqfQzCL9I31gKAaMQm1QAAAM1bpH+XDqrE7Pzzz/eGQYZhyDRNv72HPG3B3gAAAAAAABAZggqI7rjjDjkcP1SjGYbht2LI0xbMDQAAAAAAAJEjqIBo8ODBeuWVV5SUlFTjKiBWDwEAAAAAAESnoDaplqSpU6fq0ksv1bJly7Rr1y6VlpbqwQcf9K4IGj58uC6++OJGmygAAAAAAAAaR9ABkSSlpaXpmmuu8T5+8MEHvfdHjBih+++/P3QzAwAAAAAAQJOoV0B0sq5du3pXELVu3TokEwIAAAAAAEDTshQQ7d69O0TTAAAAAAAAQLhYCogCOXTokNauXavDhw9Lktq3b6+zzjpLHTt2DPVQAAAAAAAACIGQBUQffPCBHnjgAa1duzbg82effbYeeOABNrIGAAAAAACIMEFd5r4u9957ryZOnKi1a9fWeGn7NWvW6JJLLtGsWbNCMSQAAAAAAABCxHJA9PTTT+uxxx7zBkGGYdR4M01Tjz32mJ555plQzB0AAAAAAAAhYCkgys3N1b333usTAknyWz3k4QmJ7r33Xh09etTazAEAAAAAABASlvYgeumll1RSUuITDMXFxemss85Senq6JGnfvn1au3at3G63t19JSYlefPFF3XPPPRanDwAAAAAAAKssrSD66KOPJMm7Sujqq6/W/v379emnn+r111/X66+/rk8//VQHDhzQVVdd5S1Bq34sAAAAAAAAwstSQJSZmekNfHr16qXXX39dbdu29evXpk0bvf766+rVq5ekqkBp69atVoYGAAAAAABAiFgKiI4dOyapam+hCy64QHa7vca+DodDF1xwgXe1kedYAAAAAAAAhJelgMjh+GELo9LS0jr7u1wu7/3awiQAAAAAAAA0HUsBkaeczDRNLVmyRAUFBTX2zc/P1+LFi70laYFK0QAAAAAAAND0LAVEQ4YM8W48nZubqwsuuECff/65X7/PP/9cF1xwgXJzc739hwwZYmVoAAAAAAAAhIily9xPmDBBCxculFS1imj9+vUaNWqUkpOT1aVLF0nS/v37deLECUlVexV59iC65JJLrAwNAAAAAACAELG0guimm25S+/btJclbOmaapgoKCpSZmanMzEwVFBTINE1vMCRJ7du310033WRlaAAAAAAAAISIpYAoISFBr7zyivexYRjeW6A20zRls9k0d+5cxcfHWxkaAAAAAAAAIWIpIJKqSsXmz5+v+Ph4n1VCJwdFpmkqISFB8+fP1/jx460OCwAAAAAAgBCxHBBJ0vXXX6/MzEz9+Mc/Vtu2bb0lZZ5b27ZtdeuttyozM1PXXXddKIYEAAAAAABAiFjapLq67t276x//+If+8Y9/aPfu3Tpy5IgkqV27durevXuohgEAoNkxTVMul0uSWWdfAAAAoDGELCCqrnv37oRCAAAEye1264n3Nsqw2eSw2cM9HQAAAMSgkJSYAQAAaxzOuHBPAQAAADGMgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMY6ACAAAAAAAIMYREAEAAAAAAMQ4RyhOUl5ervfee0+ffPKJdu3apRMnTqiysrLWYwzD0EcffRSK4QEAAAAAAGCB5YDoiy++0NSpU7Vnz56gjzFNU4ZhWB0aAAAAAAAAIWApINq+fbsuvvhiFRcXyzTNoI4hGAIAAAAAAIgslgKihx9+WEVFRTIMg+AHAAAAAAAgSlkKiD7++GOfYCjYVUQAAAAAAACIHJYCoiNHjkj6YU+hq6++WrfddptOOeUUJSQkyG63h2SSAAAAAAAAaDyWAqLWrVsrJydHhmHonHPO0X/+859QzQsAAAAAAABNxGbl4BEjRnjLygYNGhSSCQEAAAAAAKBpWQqIfvrTn3rvr1u3zvJkAAAAAAAA0PQsBUTjxo3TtGnTZJqm1q1bpz/+8Y8qLy8P1dwAAAAAAADQBCztQfTaa69p5MiRWrJkiQ4fPqyHH35Yr7zyiq666ip169ZNbdq0qfX4H/3oR1aGBwAAAAAAQAgYpoVr09tstoCXua/eVpuKioqGDo0wKigoUGpqqvLz85WSkhLu6QBA1HO5XHp80QZVVJTL4YzT7y4dpPj4+HBPCwAAACEU6d+lLa0g8jg5GAomcwo2RAIAAAAAAEDjCklAdHLYU1f4Y2HREgAAAAAAAELMckBE2AMAAAAAABDdLAVE999/f6jmAQAAAAAAgDAhIAIAAAAAAIhxtnBPAAAAAAAAAOFFQAQAAAAAABDjQnIVM48PPvhACxcu1Nq1a3X48GFJUvv27XXWWWfp8ssv1/jx40M5HAAAAAAAAEIgJAHRvn37dO2112rNmjWSfK9sduDAAW3cuFEvvviihg8frv/85z9KT08PxbAAAAAAAAAIAcslZnv37tXZZ5+tNWvWyDRNbzhkGIYMw5Akb/uXX36p4cOHa+/evVaHBQAAAAAAQIhYDoiuu+465eTkSPohFPIEQ4HaDh06pBtuuMHqsAAARD3TNOVyueRyuSSZdfYHAAAAGoulErNly5bpyy+/9AmEqpeXVecJiUzT1OrVq7Vs2TKNGzfOyvAAAEQ1t9ut2Uu2qLzMrYqKinBPBwAAADHM0gqi//73v977pmkqPj5ev/rVr/Thhx9q27Zt2rZtmz788EP96le/UkJCQo3HAgAQqxzOODmcceGeBgAAAGKcpRVEq1ev9q4KiouL08cff6wRI0b49Dn11FN14YUX6pprrlFGRobKy8u9q4gAAAAAAAAQfpZWEB08eFBSVfnY+PHj/cKh6kaMGKEJEyZ4S9A8xwIAAAAAACC8LAVERUVF3vtdunSps3/nzp0DHgsAAAAAAIDwsRQQpaameu9v2bKlzv7V+1Q/FgAAAAAAAOFjKSDq1q2bTNOUaZr65JNP9M9//rPGvq+++qo++eQT7xXPunXrZmVoAAAAAAAAhIilTapHjhypr7/+2rtR9c0336xXX31VEydOVHp6uiRp3759Wrx4sT7++GNvP8MwNHLkyJC8AAAAAAAAAFhjKSC6+eab9dRTT0mSN/xZsWKFVqxY4dfXEwxVPxYAAAAAAADhZ6nE7IwzztB1113nvTKZJyQKdPOEQ4Zh6Prrr9cZZ5xhffYAAAAAAACwzFJAJEn/+Mc/NHToUJ+QKNBNqlpFNHToUL3wwgtWhwUAAAAAAECIWA6IkpKS9Omnn+rWW2+V3W6vcQWR3W7Xbbfdpk8++UQtW7YMxdwBAAAAAAAQApb2IPJo0aKFXnjhBd1333169913tXbtWh05ckSS1K5dO5111lmaNGmSunbtGorhAAAAAAAAEEIhCYg8unbtqpkzZ4bylAAAAAAAAGhklkvMAAAAAAAAEN0IiAAAAAAAAGJcUCVmM2bM8N4fN26crrvuOr/2+jIMQy+//HKDjwcAAAAAAEBoBBUQzZs3z3up+rS0NG9AVL29PkzTJCACAAAAAACIECHZpNo0zVCcBgAAAAAAAGEQkoCovquICJQAAAAAAAAiR9ABUU2hDmEPAAAAAABAdAsqIFqxYoX3fpcuXQK2AwAAAAAAIDoFFRBlZGTUqx0AAAAAAADRwxbuCQAAAAAAACC8LG1SfcEFF3jvX3XVVfrZz35Wa/9Nmzbpu+++8z6+8sorrQwPAAAAAACAELAUEK1cudJ7BbPBgwfX2f+VV17R008/Lanqymfl5eVWhgcAAAAAAEAIhOQy9/XBVc8AAAAAAAAiS5PuQVRaWtqUwwEAAAAAACAITRYQVVRU6Msvv/SWpAEAAAAAACAy1KvErPqm1Cd78803tXHjxoDPVVRUaMeOHTp06JC3rUWLFvUZGgAAAAAAAI2kXgFR9U2pPTx7Ch04cEAHDhyo8VhPP8/xXbp0qddEAQAAAAAA0DgatEl1oI2m69p82hMMmaYpwzA0duzYhgwNAAAAAACAEGtQQFQ97Dm5LRitWrXSXXfd1ZChAQAAAAAAEGL13qTaNE3vrab2mm6tW7fWTTfdpK+++kpdu3YN2YsAAAAAAABAw9VrBdGuXbu8903TVM+ePb0rh6ZPn64//OEPAY+z2+1KSkpSWlpaw2cKAEAzYpqmXC6XpNpLtAEAAICmUK+AqFu3bn5tnpVEKSkpAZ9vCsXFxZo3b54WLlyorVu36siRI0pKSlKXLl108cUXa/r06erXr1/EjVleXq5vv/1Wa9as8d6ysrL8Vmft2rVL3bt3D3pu27Zt09y5c7V06VLt379fhYWFateunQYMGKDLLrtMN998sxITExvysgEAIeJ2u/XEexvVIikl3FMBAAAAZJh17S5diwcffNB7f8SIEbr44otDMqn6WLlypaZNm6a9e/fW2MfpdOruu+/WQw89VK+9khp7zAcffFAPPPBAneMFGxCZpqnf//73evzxx1VeXl5jv27duum1117TqFGj6jxnIAUFBUpNTVV+fr5SUvhiAwAN4XK59PiiDXLGJ8isrJSrtFiS5HDG6XeXDlJ8fHyYZwgAAIBQivTv0g3apNrj/vvvD9U8GmTFihWaOHGiSkpKau1XVlamP/3pTzp+/LieffbZiBnTQjYX0O23364XXnihzn579uzRhAkTtGTJkgaHRAAAAAAAoPmwFBCVlJRo7dq13sfJyckaMmRIwL4bNmzQiRMnvI/POusstWjRosFjFxQUaOrUqX5BTd++fZWRkaG9e/dq2bJlqqys9D733HPPady4cZo8eXJEjpmenq4TJ04oLy+v3nNbsGCBXzhks9k0btw4de3aVatWrVJWVpb3ueLiYk2dOlWZmZlKSkqq93gAAAAAAKD5qPdVzKpbtGiRxowZ472tWrWqxr6rV6/26bto0SIrQ+uJJ55Qdna2T9vYsWO1adMmvfDCC1qyZIleeuklv+PuvPPOBq/cCfWYffv21V133aW33npL2dnZ2rt3rwYNGlTveZmmqd/+9rd+7S+//LKWLFmiF154QZs2bdLYsWN9nt+3b5+efPLJeo8HAAAAAACaF0sB0dKlS72XsE9OTtZPfvKTGvvecsstSktL8wYlS5cubfC4pmlq3rx5fu2PPvqonE6n9/H06dM1cOBAnz47d+6sNchqyjGvv/56PfbYY7riiit0yimn1HtOHh9//LF2797t03baaafp5ptv9j52Op165JFH/I6dO3duyEvdAAAAAABAdLEUEH399deSJMMwNHLkyFpLxuLj4zVy5Ejv4/Xr1zd43M2bN/ttEN2qVSsNGzbMr+/Jq2Yk6b333ouKMYO1ePFiv7aLLrrIr+3MM89UWlqaT9uuXbu0ZcuWxpoaAAAAAACIApYCokOHDnmv0BXMJe67dOkiqWo1zsGDBxs8bqBwqW/fvgH79u/f369tw4YNUTFmsOozt379+vm1NebcAAAAAABA5LMUEFXfTLm4uLjO/tU3d27IRsweO3bs8Gvr0KFDwL7t27cP6vhIHDNYkTw3AAAAAAAQ+SxdxSwlJUXHjh2TaZpavXp1nf0///xzGYYh0zSVkpLS4HHz8/P92lq2bBmwb2JiYlDHR+KYwWrsublcLrlcLu/jgoKCes4QAAAAAABEMksriDp37uy9v3379oCbOHu88sor2r59u/dxp06dGjxuoNVKDkfgrKv6BtIehYWFUTFmsBp7bo888ohSU1O9t/T09IZNFAAAAAAARCRLAdE555wj0zS9q4Juu+02zZo1S3v27PH22bNnj2bNmqWf/vSn3n6GYeicc85p8LiBVsGUlZUF7BuoPSkpKSrGDFZjz+3ee+9Vfn6+97Zv376GTRQAAAAAAEQkSyVmU6ZM0QsvvCCp6kpm5eXleuyxx/TYY48pLi5OhmF4S5M8wVD1YxsqNTXVr62oqChg30DtgY6PxDGDlZqaqhMnTtQ5h5ra65pbfHy84uPjGz5BAAAAAAAQ0SytIBo7dqzOPPNM72PPCiHTNOVyuVRaWup97AmHDMPQ0KFDA16GPVi9e/f2a8vJyQnY9/Dhw0EdH4ljBiuS5wYAAAAAACKfpYBIkubPn+9TomQYRsCbVLWKKCkpSfPnz7c05rBhw/zasrKyAvbdunWrX9vQoUOjYsxgBZrbtm3bAvYN1N6YcwMAAAAAAJHPckB06qmnatWqVercubN3tdDJPO2dO3fWihUr1LdvX0tjDhw4UN26dfNpy8vL07p16/z6Ll++3K9t4sSJUTFmsC655JKg5rB27Vq/K5b16NFDAwYMaLS5AQDqx7MK1+VyBfw7FQAAAGgMlgMiSRo8eLCysrL0+OOPa/jw4bLZbN5QyGaz6eyzz9Zf/vIXbd++PSSrVQzD0LRp0/zaZ82a5bMJ89y5c5WZmenTp1evXsrIyPBpGz16tN+Kp927dzfqmKE0ZswYde/e3adty5YtPleVKysr06xZs/yOnT59us/eUACA8KooL9PTH27T7CVb5Ha7wz0dAAAAxAjDbIR/nqysrNTRo0clSW3atJHNFpIcykdBQYH69++v7Oxsn/a+ffsqIyND+/bt09KlS1VZWenz/DvvvKPJkyf7tI0ePVqrVq3yadu1a5df6BLKMSXpq6++0uuvv+7T9uabb+rAgQM+bdOnT1dKSor3ce/evfXzn//cp8+CBQt01VVX+bTZbDZdfPHFSk9P16pVq/xK4tLT05WZmVnvK6wVFBQoNTVV+fn5PvMCAATP5XLp8UUb5IxPkFlZKVdpsfe5hMQk2e0O/WJsHy4SAAAA0ExE+ndpS1cxq4nNZlO7du0a49ReKSkpmj9/viZOnKiSkhJve1ZWVo17A82cOTNgUBOuMTMzMzVnzpw6x507d67P44yMDL+AaMqUKfrJT37ivaqcVBXULVmyJOA5ExMT/faPAgAAAAAAsSn0S3ua0JgxY/T+++8rPT291n5Op1P33XefnnnmmagcM1jPP/+87r33Xjkcted+Xbt21eLFizVq1KgmmhkAAAAAAIhkIV1BtGnTJq1fv165ubkqLi6uc3PNP/7xj5bHHDNmjLZt26a5c+dq4cKFyszMVG5urpKSktSlSxeNGzdOt9xyi/r162d5rHCOGQzDMPTnP/9ZN910k1555RUtW7ZM+/fvV2Fhodq2basBAwbo8ssv1/Tp05WYmNikcwMAAAAAAJErJHsQvfXWW7rnnnu0c+fOeh1XUVFhdWiEQaTXTQJANGAPIgAAgNgS6d+lLa8g+tvf/qY77rhDkup1OV6unAUAAAAAABAZLAVEBw4c0G9/+1uZpum9PHwwGuHCaQAAAAAAAGggSwHRvHnzVFpa6g2Gagt+DMMgGAIAAAAAAIhAlgKiTz75RJK8K4imTJmi/Px8LV++XFJVKPT888/ryy+/1GuvvSZJGjdunK677jqL0wYAAAAAAECoWLrMfWZmpnf1UJcuXfTvf/9bAwcO9Olz22236ZVXXtG//vUvmaapZcuWKScnR9OmTbMyNAAAAAAAAELEUkB0/PhxSVUrhc455xw5HDUvSLr22ms1fPhwmaapWbNm6fPPP7cyNAAAAAAAAELEUkBUVlbmvd+uXTtJ8guJioqKvPfPOOMMSVUlaU8//bSVoQEAAAAAABAilgKiVq1aee9XVlZKklq2bOnTJysry3v/6NGjkqoCotWrV1sZGgAAAAAAACFiOSDyXJnME/506NBBkrx7Ez344IM6fvy4VqxYocWLF3vbDx8+bGVoAAAAAAAAhIilq5h17NjRu0IoNzdXktS/f3/v86Zp6r333lPbtm29jz0BUVpampWhAQAAAAAAECKWVhANGTLEe3/btm2SpHPPPVdJSUmSqlYRmabpvXkeG4ah8847z8rQAAAAAAAACBFLAdFZZ50lqWpl0MGDB5WZmam4uDjdeuut3tIzwzC8N++gNpt++9vfWhkaAAAAAAAAIWKpxCwjI0O/+c1vvI9LSkokSQ899JC++OILrVmzRtIP+xGZpimbzaYnn3xS55xzjpWhAQCNxDRNud1uxcXF+YT7AAAAAJovSwFRp06d9Je//MWvvWXLlvr000/1wgsvaNGiRdq7d6/i4uI0bNgwzZw5U8OGDbMyLACgEbndbj2+aIPumjxE8fHx4Z4OAAAAgCZgKSCq9cQOh2bOnKmZM2c21hAAgEbicMaFewoAAAAAmpClgOiCCy7w3rfb7XrnnXeUmJhoeVIAAAAAAABoOpYCok8++cS7GXVGRgbhEAAAAAAAQBSydBWzdu3aeQOiU089NSQTAgAAAAAAQNOyFBANGjTIe7+oqMjyZAAAaCqmacrlcsnlcnn/sQMAAACIVZYCohkzZnjvf/jhhyouLrY8IQAAmoLb7dbsJVs0e8kWud3ucE8HAAAACCtLAdE111yjKVOmyDRNHTlyRDfeeKMKCgpCNTcAABqVwxnHFdsAAAAAWdyk+rXXXtP48eO1Zs0a7d+/X4sWLVLPnj112WWX6YwzzlBaWpoMw6jx+B/96EdWhgcAAAAAAEAIWAqIbr75Zm8AZBiGTNPUsWPHNG/evKCOJyACAAAAAAAIP0sBkYdnc09PWBTMZp+1rSwCAKCpeDarjouL4+8mAAAAxCxLexB5GIbh8z/Vnsc13QAAiBQV5WWavfhbNqoGAABATLO8gohLAwMAoh0bVQMAACDWWQqI7r///lDNAwAAAAAAAGFCQAQAAAAAABDjQrIHEQAAAAAAAKJX0CuI7rzzTu/9jIwMXXbZZT7Pu1wuuVwu7+OUlJQQTA8AAAAAAACNLeiA6KmnnvK5AtnJAdE999yjp59+WlLVVczKy8tDNEUAAAAAAAA0ppCWmJmm6b0BABAtTNOUy+Xi7y8AAADELPYgAgDEvIryMs1e/K3cbne4pwIAAACEBQERAACSHM64cE8BAAAACBsCIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjHM05KA333xTGzdu9GnbsWOHz+MLLrig1nMYhqGPPvqoIcMDAAAAAAAghOodEJmmqQMHDujAgQO19lm1alWtzxuGUd+hAQAAAAAA0AgatILINM0G9yEYAgAAAAAAiCz1DogIeAAAAAAAAJqXegVEwawcAgAAAAAAQHQJOiC6//77G3MeAAAAAAAACBMCIgAAAAAAgBhnC/cEAAAAAAAAEF4ERAAAAAAAADGOgAgAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABAAAAAAAEOMIiAAAAAAAAGIcAREAAAAAAECMIyACAAAAAACIcQREAAAAAAAAMc4R7gkAAOBhmqbcbrckKS4uToZhhHlGAAAAQGxgBREAIGK43W7NXrJFs5ds8QZFAAAAABofK4gAABHF4YwL9xQAAACAmMMKIgAAAAAAgBhHQAQAAAAAABDjCIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEEREAMM01TLpdLpmmGeyoAAAAAgDAiIAJimNvt1uOLNsjtdod7KgAAAACAMCIgAmKcwxkX7ikAAAAAAMKMgAgAEFCklSA29nwi7fUCAAAATYmACAAQUKSVIDb2fCrKyzR78bcR83oBAACApkRABACoUaSVIDb2fCLt9QIAAABNhYAIAFAvlGIBAAAAzQ8BEQCgXiKt9AwAAACAdQREAIB6oxQLAAAAaF4IiAAAAAAAAGIcAREAAAAAAECMIyACANTIsyE1m1IjWrCJOgAAQMMQEAEAalRRXqanP9ym2Uu2sCk1ogKbqAMAADSMI9wTAABENoczTnY7f10gerCJOgAAQP2xgggAAAAAACDGERABAAAAAADEOAIiAAAAAACAGEdABCDsuFIWAuFqVAAAAEDTISACEHZut1uzl2zhSlnwwdWoAAAAgKZDQAQgIjiccVx5CF6e1UMOpzOofqwyAgAAAKwhIAIARJyK8jLN+WCTKioqau3HKiMAAAAgNAiIAAARKdgVZaw8AwAAAKwjIAIARD1KzQAAAABrCIgAAFGPUjMAAADAGgIiAECzQKkZAAAA0HAERAAAP56SLSl6SrYoMwMAAAAajoAIAOAn2KuIRZKK8jLNXvwtZWYAAABAAxAQAQACisaSrWicMwAAABAJCIgAAAAAAABiHAERAAAAAABAjCMgAhAV2IA4vHj/AQAAgOaNgAhAVHC73Xp80QY2IA4TNoAGAAAAmjcCIgBRgw2Iw4v3HwAAAGi+CIgAAAAAAABiHAERAAAAAABAjCMgAgAAAAAAiHEERAAAAAAAADGuWQRExcXF+tvf/qZx48YpPT1dCQkJatu2rQYPHqy7775b27Zti/gxP/roI91yyy069dRTlZKSopYtW6pnz566/vrr9fbbb9d5/MqVK2UYRr1uI0aMaOjLBwAAAAAAzYgj3BOwauXKlZo2bZr27t3r0+5yuXT06FF98803evLJJ3X33XfroYcekmEYETVmXl6epk2bpkWLFvk9t2vXLu3atUtvvPGGzj//fP373/9Wly5dLM8fACKFaZpyu91yOp0qKyuTaZrhnhIAAAAQk6J6BdGKFSt0ySWX+AU1JysrK9Of/vQn3XHHHRE15okTJ3TRRRcFDIdO9tlnn2nMmDE6dOhQvecMAJHK7Xbr8UUbVFhYqMcXbZDb7Q73lAAAAICYFLUriAoKCjR16lSVlJT4tPft21cZGRnau3evli1bpsrKSu9zzz33nMaNG6fJkydHxJizZs3SunXrfNpatmypyZMnKz4+Xu+++66OHj3qfW7Hjh362c9+prfeeqvOuSYnJ2vGjBm19unevXud5wGAxuZwxvn8FwAAAEDTi9qA6IknnlB2drZP29ixY7V48WI5nU5J0ty5c/1CkjvvvFOTJk1qUKlZKMfcuXOn/va3v/n0S0xM1Jo1azRw4EBJUnZ2ts4880wdPHjQ2+ftt9/Wp59+qpEjR9Y619atW+upp56q92sEAAAAAACxJypLzEzT1Lx58/zaH330UW9QI0nTp0/3hi0eO3fu1KpVq8I+5rx583xWGknSjBkzfI7t1KmT7rzzTr8xX3755XrPHwAAAAAAoCZRuYJo8+bNfnsAtWrVSsOGDfPrO3bsWG3ZssWn7b333tPo0aPDOubixYv9jrvooosCnutk77//fp3zdblceuONN5SVlaWioiKlpqaqZ8+eGjlyJBtdAwAAAAAAH1EZEK1fv96vrW/fvgH79u/f369tw4YNYR2zrKxMmzdvDup8/fr182vLzc3Vvn37lJ6eXuN8Dx06pOuvv96v3TAMTZgwQX/9618DzhMAAAAAAMSeqCwx27Fjh19bhw4dAvZt3759UMc35Zh79+4NeKWeQOdLSEhQSkpKUPMJhmmaWrx4sc4880wtWbKkQecAAAAAAADNS1QGRPn5+X5tLVu2DNg3MTExqOObcsyaxm/s11BdcXGxrrnmGu3atavOvi6XSwUFBT43AAAAAADQfERlQFRcXOzX5nAErparvoG0R2FhYVjHDHQuK+fzSE1N1YwZM/Tmm29q165dKi0t1aFDh/Thhx/q/PPPD3iOBx98MOCY1T3yyCNKTU313morbQMAAAAAANEnKvcgCrSipqysLGDfQO1JSUlhHTPQuTzHxcXF1ft8kjR06FBlZ2f7nbtDhw7q0KGDLrzwQl155ZVauHChz/PvvPOOKioqZLfbA85Jku69916fq6kVFBQQEgEAAAAA0IxE5Qqi1NRUv7aioqKAfQO1Bzq+KcesaXwrryElJaXG4Emq2pz6gQce8GvPy8vT999/X+NxkhQfH6+UlBSfG6KXaZpyuVwyTTPcU0EM83wOXS6XJD6LAAAAQLhFZUDUu3dvv7acnJyAfQ8fPhzU8U05ZteuXQOuFAp0vpKSEp04cSKo+dSlpquW5ebm1vtciF5ut1uPL9oQcKN0oKm43W7NXrJFc5ZmqqKiItzTAQAAAGJeVAZEw4YN82vLysoK2Hfr1q1+bUOHDg3rmE6nU6effrpfn23btgXV1rZt2waVeB0/fjxge02bY6P5cjj9A0qgqTmccXwWAQAAgAgRlQHRwIED1a1bN5+2vLw8rVu3zq/v8uXL/domTpwY9jEvueSSoI4L5lwlJSV6//33A0+8mgULFvi12e12v9cFIDY1pOyLkkUAAACgeYjKgMgwDE2bNs2vfdasWT4bOs+dO1eZmZk+fXr16qWMjAyfttGjR8swDJ/b7t27G3XMadOmyWbzffvnzp2rLVu2eB9nZ2frySef9Bvzlltu8Xnscrl06aWXasyYMVq0aFHAco0333xTd999t1/7eeed16A9mQA0Pw0p+6JkEQAAAGgeovIqZpL0m9/8Ri+99JKys7O9bR9++KFOP/10ZWRkaN++fVq6dKnfcbNnz5ZhGGEfs1evXvrZz36mZ5991ttWXFys4cOH67LLLlNcXJzeffddHT161Oe4K664QiNHjgw4v5UrV2rlypVKTU3Vueeeq/T0dJ04cUKbNm3S5s2bAx7zhz/8IejXD6D585R8VVSU+z3nWS108h5qlIkBAAAA0S9qA6KUlBTNnz9fEydOVElJibc9Kyurxr2BZs6cqcmTJ0fMmH/+85/15Zdf+pSpFRUV6fXXXw/Yv3fv3vrb3/5W5zzz8/O1ZMmSOvvNmjVLY8eOrbMfAEhSRXmZZi/+Vndecka4pwIAAAAgxKKyxMxjzJgxev/99+vcsNnpdOq+++7TM888E1FjJicn68MPP9SkSZPqHPe8887TihUr1LFjR7/n4uLi1KdPn7on/z9JSUl6/vnn9fDDDwd9DABIrBYCAAAAmquoXUHkMWbMGG3btk1z587VwoULlZmZqdzcXCUlJalLly4aN26cbrnlFvXr1y8ix0xLS9OiRYu0fPly/etf/9Jnn32mQ4cOqaKiQh06dNDZZ5+ta6+9VldccUWNpXGJiYnavn27vv76ay1btkyrV6/W9u3bdfDgQRUVFSkhIUFt27bVoEGDNHbsWN10001KS0sL2fsBnMw0TbndbsXFxTW4pBMAAAAA0HSiPiCSqgKSmTNnaubMmQ06fuXKlU0+5snGjh1rudxr2LBhGjZsWEjmA1jh2bj4rslDFB8fH+7pAAAAAADqENUlZgAiF6VIAAAAABA9msUKIgCoi6fszel0qqysTJIogYswnqukSWaTHAcAAADgB6wgAhATPGVvhYWFmr1ki2Yv2SK32x3uaaGaivIyzflgkyoqKprkOAAAAAA/ICACEDM8ZW8OZ1zElMB5Vr+YJqtfpIaXJkbKzxMAAACIVgREABBGnpVNrGYCAAAAEE4ERAAQZqx+AQAAABBuBERAM0cJU2h53s9wvaex+POs/p6HYiPqYN7D6n1i8T2PVmxYDgAA0HAEREAzRwlTaLnd7rBuch2LP0/Pez5naWZINqIO5j2s3icW3/No5Xa79cR7G9mwHAAAoAG4zD0QAyhhCq1wv5/hHj8cPK+5rLxMh/JLday0WIfyS7XvaKG+2l+k8opKmZIMw6UT726VzWaXaZqqNE1VmlKlaaplnEOtW8YpNd6mPQUVWrsnT0eL3IozKiTTlM0wAo558n1ENn5WAAAADUNABMQAT9lFXBxfnGCN1RKeYMvFitwV2p9XptwSt3YtyNTm7BMqdJWr2F2pV9Z9VusYW48cCGouy7/L83kcbzeUkmBXQeVW9euQpEMn3Cp0lSspnr8qAQAA0Pzxf71ADKgoL9Psxd/q3ivOCvdUEOUqyss054NNSkhMatDxbrdbj7z1lSoqypWU2lpS1eqerJxCZR4uVU6hW4eLC1Xo+qFEaHNOic85HDZDHVIS1CElXu2T47TvSIHshilDks1u1/l9OyrO4ZBhSIZhyGYYMgypyFWuo0VuHSko0ab9eUpwOnSwwCVXeaUkyVVh6khRuf779Q8B0ztbVqpb6xayyVRC4vc6I721zuiSpnbJ8Q16/QAAAECkIiACYgRlFwgVq58lhzNOBW5TWfsKdLDApZzCMr2yLtenjyGpVQubOibH6bLBnbVxX4Fa2CuVYDf1+8uHKLFFgiTJ5XLp8UUbVFFR7j33z0f3VHx8zQGOy+XS7CVbNPOC3nrh071yuVwqKCpSabmpwnKbTu3cWtsOFWrt7mMqcldqz7GqgGrOx99L+l6GIZ3ZrZXGn3aKJpzWUZ3SWlh6PwAAAIBIQEAERDHTNL0b58bFxck4aQ+V4I4NvlTIyniwLtqv0HS8yK231+/Tom35OlJU7vNcyzi7UuNtat/Spi6tk9SupUNmWakczjhdf2YnFZSUq7zMrYqKctltof3c2W2GEp02JTql9s44/frC3pKk2Uu26OZRfZSVU6i/r/hO7dNaaltOobbnFGrt7uNau/u4/u+9TA1KT9Mlp3XUhNNOUdc2iSGdGwAAANBUCIiAKOa5upMk3TlhYK2rJgId+8R7G+WMT2iS8WBdQ35m4eYur9TH2w7r7Q379fG2wyqrqAq3DEnpafFKT4tX+5Z2/XHSQD374VZVVJQrITFBZmWlXGXhnbsktW4Zp3N6ttbqrCTvZz47r0QfbD6kDzYf0to9x/TNvjx9sy9PjyzZpgGnpGjCaR116aBO6tG2ZbinDwAAAASNgAiIclbKfRpybDSWqkX7ypvqQvn+e1aENWQ12A/vqXTy+2qapjbuy9Nb6w/o3W+zlVf8Q9Iz4JRkJdsr1D3VrlapKTIrKxtlVVBj6pTWQjPO76EZ5/fQ4ROlWrolRx9sPqgvvz+mzIMFyjxYoNnLt2vi6afoV2P7qHf75HBPGQAAAKgTARGAZi8aV940BbfbrccXbdBdk4fUezVYRXmZnv5wm8zKShk2m6SqYOjzHUf11PLtWrfnuLdv++R4XTGks64Y2lk9WsX77BkU7donJ+imEd1004huOlbk1oeZh/T+pkP6ZPsRvfftQb2/6aAmD+qkX1zYR73aNWxjbwAAAKApEBABiAnRuPKpKVhdgWZWVqq8vExf7Dyq51bt9gZDcQ6bLjmto64c2kXn9W7rXSH0w6qj5qd1yzhde1ZXXXtWV2VmF2jOR9u1dEuO3tmYrXe/ydblgzvrjgv7UHoGAACAiERABKDJsMl15GpIGZ5pmtqf59La/QV6+X9XIYtz2HTD2V11++he6pASuyu2BnRK0Qs3nanNB/L11PLvtHxrjt7acEDv/C8o+sWFvdUxib+CAQAAEDn4v1MATYZNriOXpwyvRVJKnX1N09TBwnJ9szNXOSf+F/g5bLpxeFf9NCO2g6GTndY5VS9NO1Ob9ufrqeXb9dG2w1qwfr8WbjygKwafooSKSjn5NQAAAEAEICAC0KQo9YpcwfxscgrL9fXBUh0uqpAk2Q2pX/sWen7qmeraru5wKVad3iVVL998ljbuy9NTy7drZdYRvbk+W4lOQxf0salzCr8XAAAACC8CIiDEwllGZeWqVMGc1zT9r1bVGOOhdtWvINZU732xu0Jf7CnWzuNVVySzG9KAji016JSWSrCb6pASWctg6vpshuvKdoPT0zRv+tlat/uYfvv/vtHuo8V6L/OoTu/YUoPa2+WIoqu5AQAAoHmxhXsCQHPjKaOavWSLNyhqyrEfX7Qh5OPWdN7GGg+181xBrCk+Y5Wmqc0HC/Wfb454w6E+rZ2aMiBZ5/VIU8s4e6OO31B1fTYryss054NNqqioaOKZVTmze2u9/dPh6teuKljbdKhI720v1LGS8MwHAAAAYAUR0AjCWUZlZexAqy48Ky0cTmfIx4s2vqtOwrvSw+GMk90emj/Cf1j15rua5nBRub7cX6JjJZWSpDYt7BrRJUHtWob/rw7PnGtbBWR3OOVyuWpcRRTuz25inF3ndU1SjzZJWrnzuPJKK/Xe9kINPSVBw7qF7kpnTbXSL5JWFHp+VyNhLgAAANGCFUQAvAKtuvBsXhyulRaRpLm+Fye/rpKyCn22t1iLvyvSsZJKxTsMjeyRoomntoyIcEj6YaXenKWZNf48KsrLNHvxtxG/wq1b6wRdfUZbpac4VGlK67JL9e6WXJ1wlYfk/E210i+SVhRGy88eAAAgkkTG/+kDiBiBVlWEe6VFJGmu74XDGaey8jJtPlior/YWyF1RtSqnT2unzu3VRgl2Q67S4jDP0pfnZ1FRUXOQEi0/rxZOuy7okajvjpXpqwMlyi5w6z/rD2pw11a68sxuls/fVO9DJL3fkTQXAACAaEBABIRITRs5R6PqmyA3h9cTSKASlEgqkWlqx0rK9enuIuUWV63Gad3CphFdWqh9S4cSnHaZlZU1HtuU5TzVx7JyfFNvTh0MwzB0aps4dUyy67N9Lh0uLNOdb25W5qEizbqkv2xBbGAdy59hAAAAWEOJGRAikVReYVU4N9puKoFKUJrTzzBYFZWmnl21Wwsz85VbXKE4u6Hzu6fo0lOT1D7IcrKmLOexOlY0lAmmxNt12WntdGZ6qiTppc92aebr61VaVvecY/EzDAAAgNAgIAJCqDmVNDiccc3q9QQS6+V06/fm6c1NufrH5/tkSuqa6tC1QzpoYMeWstVz9UlTvm9Wx4qGn7HdZmh49zT9dcppirPbtGTzId3w4pc6Wuiq89hoeH0AAACIPAREQDMWyeU0CJ7n5xiqcr9CV7nuf2ezbnhlnfJKytWmpVMX9EzSmO6JIb1svWmaKi0tVWlpaYPnHuuf4UlndNQ/bzlbqS2cWr83T1c+/4W+P1IY7mkBAACgGSIgApqxaCinQd1CWTa0MuuwLn7yE726eo9MUzq1XQstvG2YerSKD/meNW63W4+89ZUeXbiuwXOvKC/TnA82xfRneHjPNlpw+7nq0qqF9hwt1pXPf6F1u4+Fe1oAAABoZgiIgBCKxNUOnnITz9yqVqJU1nuekfja6iuaX0NdZUO+P1//13esyK1f/2ejbp67VgfyStSlVQu98qMhGtMrTaktnI017ZCUKtbn+OrvQzT+nGvSu32S3v7ZeRqUnqa84jLd8NIavftNdrinBQAAgGaEq5gBIeRZ7ZCU2lp2e2T9elWUl+npD7fJrKxURUW55nywqerLuy24kqJIfm3Bqv4amhvPxuKSdOeEgYqPj5dUFZi8++1BPbhoi44WuWUY0ozzeug3406V3azQmu2HwjntkPP9nFcE/fmOBu2S4/XGrSP0izc26MPMHN3x7w3af7xEP83oyRXLAAAAYFl0fssDIlgkbxDrcMZ5A6KGzDOSX1uwmsNrqMnJr+1gfol+//ZmfbTtsCTp1A5JemzKGRrStZUkyeVqnmVb1T/nzU2LOLv+PnWY/u+9TM37Yrce+2Cb9h8v1oOTB8phZ1EwAAAAGo6ACICk6C6/gq/KSlPzv9yjR5dsU6GrXE67oZ+P6aPbR/dSnCN8IUKoPmP1OY+nb1xcXEStsrHyXththh6YPFBdWyfq/97P1L/W7NWxIrfmXDck9BMFAABAzOCfGwFIYkPr5iKvuEw/mve1fr9wswpd5RrSNU3v/2Kkfjm2T1jDISl0n7H6bFxdUV6m2Yu/DckG36EUivdixvk99PyNwxRnt2nJ5kP6yT/XqbSM318AAAA0DAERAK/mXH7V3JVVVOrrffl6Y3221u7JUwunXfdPGqA3f3quTu2QHO7peYXqM1af80Tq5zoU8xp/Wke9NO1MJThtWpF1RLf9a6PKKipDMDsAAADEGgIiAFHJU6IT6Ipdgfo159K5b/fn6ep/fKUvd+epwpTO7ZGmZb8eqenn9ZDdVntZVVO8P3VdWSwWfkaNadSp7fTajOFKindoza7jen/rMRWUlIV7Wk2OzxEAAIA1BEQAopLb7dbjizbUWTrUnEvnilzluuftLbrsuc+19VCh4h02jemVqj4plWrfMrgt5upTrtVQniuLzVmaGXCcpphDc3d2j9aa/+PhSklwKKewTNNeXa9jRZFVVtfYmvPvOgAAQFMgIAIQtYIt0YnUEqOGKq80tW5fgeavy9bbGw/KNKVJZ3TUDcM66dR2iXLGxdfrfE3x/jiccbWO09x+RuEwOD1N/5w+TAkOmzIPntC1L6zW4YLScE+rSfE5AgAAaDgCIiAEGlraUL30pq5SqfqeM1TnQ+Op7+em0jT13VGX3tp6Quv2nVB5pakh6al6+2fn6q9TTlNinL1xJwzL6vP72ZDf5X4dkzV5YBu1T47Xd4cLdc0Lq5WdF1shEQAAABqGgAgIgYaWNrjdbs1eskWzl2wJ2VWWIvWqTfBXn8/NwQK3ps7bqE92F6q4zFRSvF3j+rXVv285U0O6tmqC2SIUgi2NlBr+u9yqhUOvzximLq1aaPfRYt34yjrll5Y3dMoAAACIEQREQIg0tLShrtKbppwLfDXGCq+T1fWz2nO0SL/877dalHlUmw8WymmThp4Sr+uGdFCfdi1lGLVvQt2UmuL9amx1bagdCo19BTbTNNW+pV3/mj5MPdq2VHZ+qRZtOarvDhfW+1yRjNWSAAAAoRXcLqYAEIM8K7wk6c4JA5t07K/3HNfLn32vDzYfUqUpGZJ6tbJrWKdEtXDa5Kjj6mTh4Ha79dzHOyQ1/fsVKp4Ntc3KShm26Pw3FM9rsNsdmj99qKa/tkHbcwo19ZWvNW/G2RqcnhbuKYaEZzXWXZOHKD6+fvtuAQAAwB8BEQDUoilXY5VXVGrplhy99Nn32rA3z9s+sncbtXWWKzUKvgM3h9VrDmeczMpKVVREb1mWwxknu92htknx+ufNwzTpuS90uLBMN774pV780Zk6t3fbcE8xJJrD5w0AACBSEBABQJi5Kyo174s9em3Nfh3IK5EkxdltunxIJ91yfk91bxWnxxdt8AssPCU2cXF8SW4qpmnK7XYrLi4uosr7apOW6NSl/Vtr69Eyrf7+mG6eu1bP3DBEFw/sGO6p1crzXkuq9f1uirLA5iIaP78AAKDpEBABqBd3RaUKS1z6OOuIKipN7Txaovc2HZJhs6u80lRFpalSV5k2HyrS/DX71Dalhdq0jFfrlnFKckoVlabsEVgeFQ77jhVrzf4iZeW6VFZxVJLUumWcpo7opptGdFO75KolQ1VffP15NjG+94qzmmzOsS5ay5qcdpv+ceNg/fatLVq6JUe3z/9aj181SFcN6xLuqdXo5BLPmt5vT2ljeZm73hcKiDXR+vkFAABNg4AIgJ+yikrllZQpr7hMr63Zr8/2FCq/tFz5pZUqKc+XJP13wyFv/+Xf5QU8z+e7CwK2x9kNvbvlsNokxattUrw6Jsdp15FCpSQ4lJVTqF4d7GoZ3/z+eKqsNPXN/jwt35qj5ZmHlZVzwvtcz7aJunVUL10xpLMSnMFfrp4Sm6YXre95nMOm524Yqnve2qQ3v96v3/6/b1RQUqYZ5/cI99RqFOx77ekXzWWBTSVaP78AAKDxNb9vYADqzTRNfZ9brG8OlWhvnlvzvv5C5ZVVpRrvZx7x69/CaVOvdkmKcxg6eLxE3dq2lNNhl8NmyG6zyaZK7cg5oT4dU5RfWq5jRW7vrdKU3BWm9h4r0d5jJX7nXvy/8dq0jFN660Slt05U19YtdEpynPbnu7TveIm6t3M27hsSpOolXjWVa5S4K/TZjlwtz8zRR9sOK7fwh9VAdpuhjkkOndahhZ664Sy1aJHQVFMPOc97UVXiY1QrD4qdkp9oKN9x2G16fMoZSm3h1Muf7dJD72Uqv6RMvxrbJ2Ln3FA/fCYDl6hFw88LAACgKREQATGq0jR1uKhCB064dfHTX2jPSWGNw2YorYVDw7qmKud4kZLjpJR4m9q3SlFifJx+MbaPJGn2ki1+5R8ulytge0lJqf66NEslZZWaOKiTTrhNHTxepAVffa/iSptOuCpVVmEqr6RMR4vcOlrk1sZ9eT7zen/r57LbDJ2SmqDKigoddWeqR7tkdWnVQl3/Fyi1adk0X/jcbrfmLM30lmuYpqnDJ1z6LqdQ23NO6PMdufpsR65c5ZXeY5LjHcro205j+3fQOd1T9Y/lm2XYbLJFedldRXmZ5nywSUmprWW3O+R2u/XEexuj9kpgDREt5Ts2m6HfT+yvtBZOPfHhds356Dvll5Tpj5cOiPrPYXXVr+YWqEQtWn5eAAAATYWACLDA8y/QkbY5avV/Oa8+r7KKSu09XqLvjxRrf0G5XBU/POe0G+rQ0qFureL1+4n99e63OTIMQz8Z2VVzlmZ6SzfiHYG/8Ff/1/jqczDNqjHi4+NlsxlKcNqV4LTrzG5pio+Pl8vl0v5DR+SMT5Dd7tAvxvaRy7Rp37Hi/91KtO94sXbnFurbfXkqLjflLq/U/uNVgdab67P95pIYZ1d6q0Slt26hTqnxysopUUunlBxnU7zDkL2iUnYLP69K01RhSZk+2XFMm4+4de/CLdp1tEQ7cgp1wuVf4tKlVQuNObWtxp12iob3aKO4/72HNe0tdPJ7Gmmfr5qcXLricMbFXMlPoPKdk383IoFhGLrjwj5KaeHU/Yu2aN4Xu1VQUqbHrzpDDnvoQr1wr9LxXM2ttucBAABQhYAIsMCziWqkbY7q+Zfzqkt1V6ikrFJbc93KOlogV/kPQUO83VCXFIe6t26hTskO2VUhhzNOndMS6v1lrvq/xvvPobxeGymnJDg1sFOqBnZK9bZ5ViX96uIBKiiTdubk68WV3+m0rm2Une/WvuPF2n+sWAcLSlXsrlBWzgmfPX58nZChqlDs/e2fKjnBqaR4hxLj7Mo+VqTv87+Rq0IqKi3T3mPFKq+o1Nvf5qi4rEInSsrkydXmr/tfOLX3h3FshtS9TUv1ap+kwelpGtu/g7qlOfWXdzfq7K4p3nAo2Pc0Ej9fqJ+TfzciybRzuyu1hVO/+X/f6K0NB3Sk0KVnrh+itMTQBCes0gEAAIgeBESARZG6OarDGae8Yrc27CvRd8eq9v6RpOR4u9JT7Oqa6lT7lnbZDEMJiUkyKyvlKi22PObJj83KypCWGdlshjqkxCst3tCqdon6+eievuVt5RXKzivV3v+tQNp95ISWbzmoE64KFbor5a4wZapqPY67wtTBfJcO5vuu5Pn+WKnfuMdLfH++NkPq1TZRlZWVuuT0TurbKVV92iere9tExTt8N5l2uVwNXqkQqZ8v1E8kr1S5fEhnJcU7dMe/N+jT73I1+dnP9eKPzlTfjskhOX8kv3YAAAD8gIAIaIZyi8uVue+Ydh0t8RYmtU20a1h6qrqmxanM5b85dFM7uQStPsd47gcS77CrR9uW6t4m8X/lWa1klp7wrsAxTVOOhJZylVWo1F2m687tI3eloROucuUVlmjRhv266LROSkmMl8Oo1LLNOXLYDE09p5taOG16ddU2tUhIUGJCnG4f1U3PfbxDPx/Tk9URzVSoNtuuqewzUowd0EELbj9Xt/1znfYeK9YVf/tcs68ZpPGnnRLuqdXKd3N0AAAAWEFABDQTpmnqs53HtHh7vg6e+GG1SZcUh05rH68OLe1q0bKFzMrKWs7SdDwbPP/y4gFBH1N909mZF/Su8/yByrMMw5DTbpPDkBLspk7vnOINd1wul74/eFzXn9XFuz9SVnaBJOn0zimSqlZgOeOqVl6h+fNstu2Mt3aFueoll5G6cfeATil69+fna+br6/XFzqP66fz1+sUFvfWrsadG7ObV1TdHBwAAgDUEREAzcKjApav+sVab/xdmGJL6tGuhM05pqSRbWXgnV4uGlJ7UtelsoPNTngUrQlUi5Sm5jOTPY6uWcXptxtn68+JteuXzXXr64x3KPHhCT147SMkJznBPLyBK2AAAAEIjMv8ZE0CNqpdm5Ra69NH2XC345pA2ZxeohdOmge0TNGVAsi7o01ptEhvvCx2lHXXzvEeR9j5F6rwQnIaUZ9aHw27THycN0BNXD1Kcw6blW3N0xd++0PdHChtlPAAAAEQGVhABUcbtduvRd9arY/u2embFThW6qsqnerd26sWpQzX/8x1NskIhVKU3zVn1MrdIKivi6mjRramuDDZlWBf1bp+kn/zza+04XKjLnvtcT18/RGP6tm+0MQEAABA+kfONBUCtPKsGPtueo3e2FuiRD7ar0FWhdklxunxgG43ulaY2SU1bauFwxvmtRomElUWRMAcPhzMuIktgInVekaqhq65C/Vn0nM/htLY6MNhVSIPS07TojvN0ZrdWOlFarhnz1uovS7eptCz6gkXTNFVaWqrS0tJGXYEFAAAQrVhBBESJvUcKNOPVddp5tOoS7GmJTv3mwt7KPl6kyvKysO1rcvLmu56VRYbNpnDtWOKZQ4uklDDNAM1NQzeZDvXvQ6hW7tVnFVL75AS9fusI3b9oi/791V49t2Kn3v/2oP50+ek6v09bS/NoSm63W4+89ZWc8QlBbXQPAAAQa1hBBES48opK/X3VTk14drV2Hi2VIal/+wQtveNcXXNm54i4mtbJq1EiYWVKJMwBzUtDV12F+rMYyk2zgxXnsOmRK0/X36cOU8eUBO0+WqypL6/Rr/+zUUcLXSGZT1Pw/Az58wEAAMAfK4iACLbzSKHu/O83+mZfniSpY0q8zuuWrFYJhtIaaQNq35KY0IVPpmnK7Xb7nDdU5Tc/nFuKi4vML36e1xoXFycjiFAvksrkol31cqrm9p4G+9mv7+evJuNP66jzerfRE8u269XVu/X2hgNakXVYsyb01+TT2zX4vJ45ut1uy3MEAABAw7CCCIhAlZWm5n6+S5fM+VTf7MtTcoJDj1w+QFee0UFtWzZu4VZFeZnmfLAp5JsXe0pjqp83VGN5Nl2evWSL98typKkoL9Psxd8GPb9A7xcaxlMe9pd3N+ovi75uVu9psJ/9+n7+apOc4NQDkwfq7Z+dpwGnpCivuEx3LfhWN839WsdLGl7q6il7i9TfYQAAgOaOgAiIMPuPF+vGl9bowXcz5Sqv1Mg+bbX0V6N05ZBOTfav6o1VfhHovKEsl4n0spH6zi/SX080ac6lRcG+rlC/9sHpaVr08/N03yX91cJp19o9eXrz2yN6+uOdDd7Eujn+fAAAAKIFJWZAhDBNU/9du08PvZepQle5WjjtmnVJP00d0U2GYfyvNCYy1FSqU9/25izQaw5VmU9951G9BInSnehT/XMTaRx2m24d1VMTTu+o37+9SSu35+q5Vbv03/XZmnZON101pGOjjBuLf6Y0RDSU3wIAgMjBCiIgAhS5K3T769/orgXfqtBVrmHdWmnJL0fqpnO6R+QX+ppKw+rb3pzVVFIXqjKf+swj0svvULtoKL3q0ipRf79hkC7qk6aOKfE6csKlvy7brtGzP9Onu/K1K7copONRghkcfv8BAEB9sIIICLMdR4q0ascxlZZXKs5u068vOlW3jeopuy3ygqHqaioFqW97c9aYJXVW54HoEg0/Q8Mw1LNNCz15/VAtzzqmFz/9XluyC5SZU6wJz67Whf3a68cje2p4j9YhCb4dzjhVVDR8z6NYEQ2fHQAAEBkIiIBGVFtJUZGrXH94e4uWbsuVJPXrmKSnrhuifh1T6nXexhSuMg4rVzPyvVpaw8cvLS31vnbTlEpLSz3PevtEU4lLU5YpRdt7E0rBlBFGyvvTWCWPTrtNlw/prMsGd9KnWYf0h3e2aM9xl5ZvPazlWw/r9M6p+vHIHrrk9FPktDf+QmbP7/P/HjX6eAAAANGKEjOgEdVUGrL5QL4ufeYzvbXxoCRpWHqK/t+tZwcVDklNV6oUrtIwKyU1oSg9cbvdeuStr7xXvKooL9NfFn3tcwWsaCuba8rytmh7b0IpmPc5Ut6fxv5MGIah4T1aa3zf1lpyxzm6cXhXxTts2nQgX798Y6PO/NNy/ea/3+jDzJwGb2odjEC/vwAAAPDHCiKgkVVf3l9Zaerlz3bp8aXbVFZhqmNKvEZ0S1PntATFOWrOawOtODi5bKCxViU0RnlCMHP1jOvpW1N/3xVDhvdYq6UnDmecDJvN53FNc/SdR+2bwYZ6JU99zteUpSaxXNYSjiuKBeOH37uq+5Jkdzi9bY25iXmPNon64yWn6s6LTtXra/bqtS/36MgJlxas368F6/crMc6uUX3aqMzlVqGrXPHx8SEdP5Y/jwieldWrACBxcRBEP1YQAU3k8IlSTZv7lR5evFVlFaYuHthB79w+Qp3TEuo8NphVMZGyKiEY9Znryat5Aj0fCZvVBrsZbKhXbYRj42tEp4ryMj394Tafz2igtsbgWRWY5JTuuLCPvrz3Qr1x2wjdfG53nZKaoGJ3hT7Yclgf7cjTiMdW6ZZ5a7VgQ7ZK3JH/5xmaj2jYEB5AZOPiAIh2rCACmsCq73I1a2GmcgvdinfY9MdJA3TD2V3r9RdHpK5KaKj6zPXk1TxWztWYgp1HqOcbKa8fkc/hjJPd7qizrbHG9rDbDI3o2UYjerbR/ZMG6Nv9+Xr/mwP6z7p9yi+t0EfbDuujbYclSW1bxslVKY3u11Fndm+lBKe90eeK2MWfpwCs4s8RRDMCIqARucsr9cXuAr3wZdVeQ/06Juvp64eoT/ukk8qmflh+GuzS1EjZ6LaxVC+Hqek1NsZ7UN9znlzi1ZSbQdc1r+obbSO0atvgOVApl9VxTv4ZBvqchWKD9nAwDEOD0tPUr30LqaxUk4Z104dbj+jDrYe19VChcovceunzPXrp8z2Kd9h0do/WOqdHKx0tLldqnGlp+X5jbdQNAAAQjSgxAxrJ8eIyTX11ozYdKpIkTTunmxbOPE+ndkiutWyqPqVK0VJS1hCe0pc5SzNrfI2N8R7U95wnl3hFSslXXaV5sKa2UpRQlm3V9HkM9DmLlHJLKwzDULe0OBUV5Os/M4Zo+vAuuqB3mnq1cqpdcpxc5ZX69LtcPb7sOy3cmq//bDmhT/YUK+twkYoaUI4WKb+vAAAAkYAVRECImaaprTlF+nxXvsorTSU4DD15zRmacEYXn361lU2Fq1Qp0jiccTIrK2vdcLox3oP6nvPk/pHyc6mrNA/W1PZzDmXZVk3j1LRxutUN2iOB57UlxtnVp20L9Wzl1G8nnqG9+WX69LtcrcrK0Rc7j6q03NT3x8v0/fE8SVJagk2dkh3qlOxQh5YO1b3DW+T8vgIAAIQbAREQQq5yU6v3l2h3XpkkaXj3NPVpHacL+rZr0Pk85Q9Wy1RiRUNLzjylOdFajhXs6/aUnf3vUaPPK9pFehln9fmZpkIy1/pcxSnYvqF6Hw3DUJ/2SeqWFqerz2ij2Us262CBWwcLy3WwsFJHisqUV1qpvFK3Mo+4ZTOkjsml6pwar44tTLVuYQuiZDf6rzpT/ecCAABQHwREQIjkFJbrkz3FKiozZTOk4d3S9Pz1p+n5FTsbfE632605SzP1y4sHhHCmzZenHKe+KwI8ZX3lZe6oLM/xvO6k1NZ19vvLoq8lqc6+aPjnqalU/7mblZUhmaundO6uyUPqvNR8sH0980xITLI0N8+Ynt9VmZU6JdmhU5IdSkhMUomrXHuOnlD2iTIdKChXUZmp7AK3sguqysdaOg11TXWqdwenurSy+533uY93SJLunDCwztceyar/XAAAAOqDgAjNXn3+RbwhKk1T6/af0Pr9RTIlJcfZNLZvG3VKS5QtBONF6pfTxmJ1o92Gvl+e46K1PIeyxMYR7PsVaJVMKFcg1bRpe/X5hWKucXFx9b7CYCj71TbHk8918u9qgtOm7mlOdU9zyjRNnXBX6nCpXfvzSrU/36WiMlNbc93ampureIdN+/NLVVzsVnpqfMA5BnvBgKYU7N9n4f49b+y/dyNlTAAAmhs2p0CzV9tmslZl55Vq4bc5+np/oUxJvVo5Nblvkjok8yW8oZrDRruIPYE+t6HcRD2YTdvrc66T5xVpmzVbfe8Mw1BKvF2nnZKki/u21nWnpejCHonq3dqpBIdNrvJKvbvpsD7aeUL/3JirX76ZqW05hXKXV3rPEewFA5pSY/59FkrhmGe0vDcAAEQyVhAhJjTGv6Yu3nRQ9yz4VgWl5XLaDY3onKBerQmGQqG5bLSL2BLocxvKP3uC2bS9PucKpi2cQvre2QylpzqVnupUXIuWOlxYoaQEuxZ+c0iF7kqt2H5UkrTSOKq9+S5dMbSLzu2eGnHviRR5P6eahGOe0fLeAAAQqQiIgBrUtFw9v6RMD767RW+tPyBJap8cpwt6pSpBgf/VsnrpBsveIfmXC/EZQSQJxecxkjd+txmGOqXGa/rwjoqrKNFxl9S+VbL+83W28krKtTTzsJZmHlZSvF1d0hJ0aruWKq+oVGPtStRUpVGecUyzkj9vAABAQJSYATUItFz9k+1HdPGTn+it9QdkGNLp7eJ02cC2Sk2oOWuNtNINhN/J5UJ8RhBJQvF59JRnhaIkrjF4StgqKyvVJtGhmaO66YZhnXTNkFN0y3nddEpqggpdFdqWU6RFmw9r1BOf6YFFW7R+7/GQX1WyqUqjPGWQblcpf94AAICAWEEE1MKzXL3IVa4/L96qf63ZK0nq3iZRj1w+QMs37pLdZsisrP0LA8vecbKTy4X4jCCShOLzGOkbv5/8Gg3DULukOP1ibB/dN3GgVu/I0Z8XZ2lnbrGOFrk174vdmvfFbqW3bqHJgzpp8qDO6tsxuVHm0lg8ZZD8eQMAAAIhIALqsG7Pcd27cKv2HiuWJN18bnfdNb6v7GaFlm+s+biTr8JTvawo1P8CHWtCfXUoK1dNa6z5WPmMhHI+QChV/3MwkkucbDZDZ3ZrpdF9/n97dx7fRJn/Afwzk0zSJr1bKBR6AOUsl1BFBSlyiMglrsJ6ciiigAcsrjes/NxdWcQVBdZjBUTdVcELFAW50UUsUI5SoBRaWkpL6X3nmuf3R0jIZCZpetGk/b5fr7wgT5555pnnmaSTb57nmXDc1i0Mg+LC8GPaFWw7mY+c4hqs3n0Oq3efQ88OgZjQNxJ3D+qM6DB9S1ebEEIIIaRRKEBEiAu1JgsOXCjHBwfzwBjQKcQfy+/tj1vjIwAABoP7aRO2KQxqQQM1r4LRaMTqXRkAgHkj45u9/q2ZY9s2lm3ahX9AUKPrExAc1iT1+cfmFDwztk+j60OjBIi3cfwcXDguAVptc63s03RUPIekHhG4o18n1Bgt2HHqMjYfu4Q9ZwpwJr8CZ/IrsGJHBgbHhmLSgCiM798REQHef1yEEEIIIc4oQESIgmM5pVjwRQrOF1pHDU1LjMYrE3oj0E+oVznOX9DpC3vTaYq2tI1mUAv169fmqI/jyApvqA+pm63PvHnEVlOMkGtqzuemNy9o7cxfo8LEAVGYOCAKZdUmbDmagzV7ziGvwojDF0pw+EIJXttyEkO6hGNcvw4Ym9ABkUF+TVoHb1vU/notsk0IIaRh6HOa1AcFiAhxUGUwY+XOs/jol0xYRAadwOOfU/tjbL9OLV010gwcR9o0PiTT+Lq88/NpMFEEx9P9A3yBY58Zaqu9MihnGyHnzeeUbUFrs8no1fV0FqwTcN/gTsgpKMWDQ+Px85kibD6ai2MXy3DgfBEOnC/C4u9OYlBMCMb17Yg7+3ZAdJiu0fu1LSL+4pQbvWIElm3U458n3eAV9SGEECJFn9OkPihARAiskfWfUvPx2pY05JfXAgDG941EhBYY0SOihWtHmpM3fal3XriaeD9f6DPbwsTezNsXtK5LZJAWjw7rgkeHdUFOcTV+Ss3HTyfzcfhCCY5kl+JIdin+uvUU+nYKwp0JHXBn347o1k7f4F9yvelzC/C++hBCCJGiz2niKQoQkTZFaYhlVmEVlmw+ib3pVwAAMWE6vDYpAbd2CcaKralNPpTf1XQUxhhqa2sdXuPc5m+N2tKxukPtQLzRtelqgCAILs/R+kxr84Vz3fbZzBiDVqut829BdJgOs4d3xezhXXG5vBbbTubjxxP5OJhZhNTccqTmluPN7enoFOKP4T0iMCQuBLVmsckX8HbsB8Zw3dvZdjyCIMBkMgGA/f/ePM2hof1AUziaBrUjcUTnAyHXn++M5SakCdiGWBqNRtSaLHh7RzrueHsf9qZfgUbF4+lR3bF9wXDc3qs9gGtD+W1fipqCbVqTxSJd5NpoNOLvX/+O5ZsPS15zlb81akvH6g61A/FGtqlgb/14EpWVlVjx/VHFc9Q2rc2T87c+eVuKxWzC8s2H8ca3h+r9tyAyyA+P3BKH/z5+M5JfHo1lf+iHET3bQVBxyC2twX9/z8GzX57Ax4cu4973D+KP7x/Aoi+OoLK6ttH1dmzblvhMsf39rKyslJw3tr/B3so2dfStH0/Wq56O1xek4agdiSM6Hwi5/mgEEWlz1IIGv2QU4f+2nkFWkXUR6mHxEVg6OQFd2wUo5m+OOrhKV1qDoy0NC21Lx+oOtQPxRo7npbtztD7nry+c62pB0+h6hgdoMe3GGEy7MQbVRjMOZhZjf3oh9p8twNmCKqTmVQIAUi5VYWfGXgzpEobEuDAM6ByC/tHBCKrnTRJs9Vb6//Vi22dL16O+1IIGKlX9L5F94dh8AbUjcUTnAyHXFwWISJuSW1qDn9NL8P5veQCA9oFaLJ7YB+P7dfS6oau+MPWCkLaoLbw3HafZ2KYFMda445W2m/LnbV1TcK8+a1Q93O2TMaC2thaiKMqmyTXl3cN0GjVG9GiHW+OC8ecxXfH65hPoGRWCTw7mIqe0BtVGC3afuYLdZ6xTnzkO6NYuAP2iAnG5uBbtAwSYLCJ8ealRxymLNH2EEOLrbJ9pjf1b2dj9N/TzlKbzERsKEJE2odpowetbz+DzQxdhsjDwHNC7nR82PHozwoP1LV09RY532CKEeA9vuvtdc7FNs1Gp1Jg3Mh4rt6XhmbF9Gl3myp9OICA4zOXoDFefe7ZpXgAQEBzWqHq4qxcTRft+nPfV1HcPs02deGZsH+g1KkzuH4n8MgMYYxjbryOSs8txNKcUxy6WIqe4BhkFlcgoqLRv/+Pf9qBvp2D06hCIHpGB6N4+AN0jAxEo+Ebg0jZlEQAWjkugO+sQQnya42d6S+6/oXcqozudERsKEJFWrazahDW7M/Dfo1dgFq0XzZ2CNPjnfQnYeTIfAX7e/Rag4BAh3qktvDcdp9k01fF6Uo67KbjNpTmmztV3vzYcx6FXh0AMiL12B83CSgOO5ZTiUGYRvku5iMJqMwxmEYcvlODwhRLJ9iH+AvzUHIoNDDmFNWgXxCFIw0PFGHgv+1W4LbyPCCFtR0t/pjV2/y1df+IdvPvbMSENVGUwY/3/svDe3nOoqLXeNnlA5yAsGBWPA2fy0CsyADtPelaW4x1sOI5r0NDLtjAlhVxD/U3aEtv5Xp9z3nG6VltjG8bvrr2cp7NFBGgxqnckhnUNgbGqHOA43HtLd5wuqEb65UqcvVyJswUVyC6uRmmN9Y5hG1Pyr5ZmXWuP54AADY9ADY9gfwPW/e8CurQPQkyYDjFhOui1nl0SNsU0hsb2va9NhfC1+hJC6oeu+0hrQgEi0qoYzBb852A2Vu/OQGGldW2DHu316BKiwcoHBoHjOBw4k1evMm1TG/x0AVCp1A0aCu84hYG0fm1hChIhNkajEat3ZcBsMiousq/ENl1r4V39m7l23sc2tcpdexmNRqzcluZyqD/HcegaoUfvTtK/KbUmC07lFuPdHeno2j4Q20/mo8wgosJggciAcoOIcoOI3Aoz0radlWwbohPQMdgfHYP9HB5Xn4dY//UTVI2ehtAUfe9rUyF8rb6EkPqx3TVS0Pq1dFUIaTQKEJFWoaLWhE2HL+LDfedxqcy6kGlsuA4Lx/TAHT3D8fa2tEb9ame7gw3Pqxr8yycN22xbqL9JS2vKXzTrWmDadr5bLGaPy2wt7xHn0T6uRos4LmDqSXs5t490Me1ri4g77sdPUKFPxyDEh/th9i0dwQxVELT+sFgsKKmoQoVBRIVRRJUZ6BQehNzSWmQXV6Ok2oTSq49TeeUu6xSqExAZpEV1rQlVW04hOjwAEXo1cssMuFBUjegIFXhmqfO8q6vvPRlxo1ILkjZojOuxuGxjznfH9gBAo5FIq6P0nve1UTmt5W8aIRQgIj7t/JVKbDhwARsP5aDKaAEAdAjyw9OjuuO+xM4QVLz9ArIpOC5SSggh3qwpF7r3ZIHptsp58WpXo0Uau4Cp7Rdq66gji30RcaVRrc6jGHmOQ4CGR4CGR0dYv8g8N6GffbvyWhMuFJRjza4zqDKI6BcThoJKE/LLa3GptAZ5ZbWoNlpQUm1CSbV1Clv2oVzJPr8/9T8AgFbNwV/Nw1/NQa9VwbIrE2cLqqDTqHAouwxltRbote4DG56MuHFcSH3huIT6N6jC/lpqcdm6OLYHABqNRFodpfc8jcohpGXQVR7xOaLIsPfsFaz/NQt706/Y0+PbB2D6LbG4LzEafoKq2fZPvxAQQnxFU35e0Wefa85t01wLbasFjX3UkeMi4o3dV5CfgB6RAejaLggA8PTIbpLgA2MM5bVm5JXVILuwAp/8mmkPIl0qqUbapTIYLAw1JhEGM4PBbEEpAFSakVF00V7O9tOF9v9vTP0fBBUHvaBCZkktOgTr0D5Ii/aBWoT68Sg3cSiqNCJS0EDFKweU6mqD+vD289vThdQJ8VVK5zWd64RcfxQgIs2qKRdmtE0j23DgAjILqwAAHAeM6tUeM27tgqHx4bJhqbb9N/Xw1OYqlxBCvInSEP+mGvbva9MHAPl0Mk/zAlA8VndtUJ/tm2u6kePf8GB/AcH+ArqEanE44zKeGdkNGo0GFRUVWLUzHfNGdketyOOfP59FRY0B5VU1qBV59OwYjN8yS1FttEBQ87hUWgOTCFRfHfVbVmPGpdTLivvfeHwfOA4I02kQHqBBmF6DUH8BYXoBp/KqEeBngU4r4FBWCQI11nUIbdPE3E9ZAbRabYPbrCmubepbhmPdfek901KuXaehwTcXacmpfI3Zf0vXva1wPMeac3pqU3E1NVkpD507bRsFiEizauzCjCaLiF8zCvHD8TxsPZFnn0YW6KdGTJAab98/CN07hiru9+9f/27/pdVPF9C4A1Eo3zbUnxYhJoS0VkrT1BzT1HzDR2v64mLuztPJ3HFcaBqA4lQJWxso/Y1yntJ8bXpZw+rTEHX9DXf8W/jPH09g4V39Ea7XIFgDGLQWqAUNnh7TDe/vzwYAzLktBiu3pcHMONx7UxzW/s86PXxwXBiKq80oqDCgoKIWl8tqcbGkGgYLA2NAUZURRVVGhRrWAAB+OnVtNPFnKbsQptcgTCegvNqAQXHhiAj0Q6hOgwANh70nLkArqDB3TF9EhuoRqtMorKrVuHZpjjJsU+qYKHq8GHxbZlsMHkCDbi7S0guLN2b/LV33tsLxHJs3Mr6Fa1M352m5SudGc/9NIb6hVQSIqqursX79enz77bc4deoUrly5goCAAHTu3Bljx47FzJkz0atXL6/e586dO/Gf//wH+/fvR35+PiwWCyIjIzFkyBBMnToVU6ZM8bisnJwcrF27Flu3bkVWVhZKS0sRHh6O+Ph4TJw4EY8++ijCwq7f3bTqOzzUMSi0Pe0yyq7eshe4Oo3s1jiM7xOB93edQUyYzu1+m/MiynGoPyGEtFbNOezfF6cP1KfOnkwLclee8/ZKf3Oauw3rKt9Wr/rUQ1BxiA3zR1SwNWA245YYyZcRg8GAt348iafv6I1qC4+iKgOKKo3IL63CV8nZSOgUjH3pBTCIHGpNDIKaQ36ZASaRwWRhuFxuwOVy6y/lucfzFeuw/dxB+//9BR4qDtibWY7yaiP8NdXw16gAnkdEkD/C9BqE6DQI1QkI1WmgUzOo1I0Pa9a379SCBkwU6drDQ00xpbMlNWb/LV33tsLX2tmTabm+dkyk6fl8gGjPnj2YPn06srOzJekGgwFFRUU4duwY/vnPf+L555/H0qVLm2S4XFPus7S0FNOnT8fmzZtlr2VmZiIzMxOff/45hg0bhv/+97/o3Lmz27q9++67ePHFF1FVVSVJz8vLQ15eHvbv349//OMfeO+99/CHP/yhHkfdvGxBoa0n8rDtpDQoFBGgxbi+HTC+f0cM6RIGjuMkC083fJh2w4aDNse0CF+cakEIIW2Zq7uKOU41cJ4W5Oqzvq67xHlSl9pa6x08PZn+7DgNG7BON6hr/453+lKa7tSYv2OO9XcMFKlVPNrptGgXaE0zGAKRfrEI80bEQTRUQ9D6QaVS20cmWcBhRlIvVBiBvNJK/OdAJm7q1h4VRhElVUYUVdbieE4JDGYGlYpHWY0ZZtG6dhIAnL5su3ayXoOk5lW6rDPPAd+cLEaoTkCIToMQfzVyiqrA7chARJC/PaCkVwN6tXVdJlF03TZKdyoDAEEQmvT6oD7T7K7XVCWlqTqeTIdpjv0rTUm01ak+0xLr03bN0c7upgq5O97GTMvzpE6tceqbt1/DN/az2fnzorX2I7nGpwNEu3fvxvjx41FTU+M2n8lkwuuvv46SkhKsWrXKa/ZZUVGBMWPG4NChQ3Xu95dffsHtt9+O/fv3o0OHDop53njjDbz44ot1llVYWIhp06bhs88+w7Rp0+rM3xwYYzh3pRK/nS/Gwcxi7Eu/ohgUuqtfR9zUJczlApVAw4ZpuxrS74mmvDNQc5ZJCCGk+bi6q5jjVAPHaUEWiwWAxX43OEeNvUuc491+ru3LNWm9zFh4V3+s+P4o/AOCXO7f8U5f8uOSHkND68/xfKPuEqrmOXQM9kOcVose7fxwOOMyHh0aa782MBgM+MfmFPsInBfuToQRKhSUVGL1zjMYndABXx68ABNUqDVZUFFdAxPj0SE0AKU1ZpRWm1BcbYTRLEJkwJVKI65UGgFc+1HuVEGWy/ptOLwToToNgnUCgv3UKKqoRX5NKsIC/BCg4XEoIx/33NQFwf4CtqbkwE/NY+7t3fDhzlSoVKommYppm4Jva2t3103Xa6qS0lQdo9GI1bsyADRsilhD9++4L+f3SX2m3dSn7Zqjnd1NFXJ1vI2dlleX1jr1rammXTeXxiyLofR50Vr7kVzjswGi8vJyPPTQQ7JATc+ePZGUlITs7Gxs374doijaX1u9ejXuuOMOTJo0ySv2+dJLL8mCQ3q9HpMmTYJWq8WWLVtQVFRkfy0jIwNz587F119/LSsrOTkZL7/8six9+PDh6N27N5KTk3HkyBF7usViwezZszF06NA6RyU1BVFkOJNfgd/OF+LAuUIkXyhFUaV0PYGIAC3u7BuJO3pFIDE2FP5+yr/UKEXCbYEVW1S7rki5YyDGto0gCPZfTur6wGuOQA4FhwghLcnbfwX1Fo6/qKoFwR4gcTV033laUF1TzerbD7b8akGo1xQkW17bVGznv4u2kROOx6tSq+11U9qX87G5GhklHX0Fe/3B8Q4jsUTFERDO7VNXe7kaXaUWrOUGaQVow3RopxdwU2c9jmRoIGj9wUQRhloV1IIGz00YAK1Wa99XtdGCN7eexLi+7bHxaAFqTQyDY4Ox/eRl9OwYhEoTQ0m1CcVVBlworEKtWYRZBESF9ZSyS6VT4A7knJQ8//yYdX0ljQrwU1dCq+ahUQGXak4gRKeBn5pHsE6DQD8BAX5qBGhV0PIMeo0agX5qhAb6I0CrhgARfn7aa8fu4RT8prw2cTdCRS1oHM478do5gfovMK503jmPenC89nPcl/OoIdt5jquj1+szakKlFiSjoAC4HHnR0HZWGnlmO3bb/l3tz/beYIzZr3uVro+bcqSIuzp5ukCyY9+ZTNYfl23/d0xzNTrKdr3PGAPHceA4TnZeOG9fF3f956odXS2k70neut4PjiM+bed3XT8cuDs2588LpfMEcL/4dX1ne9AopZbjswGiFStW4NKlS5K00aNHY+vWrRAEa3x03bp1mDVrliTPwoULMXHixAadbE25z3PnzmHNmjWSfDqdDgcPHkRCQgIA4NKlS0hMTEReXp49zzfffIP9+/fjtttuk2y7aNEiSWAKAJYsWYK//OUvAKxvtJkzZ+Ljjz+2v15RUYElS5bgo48+qk8zeMxoFrHht2xsTy/BF8f3obTaJHldq+YxKCYUN3cNxy3dwjE4NhRmk/XXi19P57n89cLxl1Kl19768STMJqPHH4SOv4o2xS+YhBDii2gko2eURs80dfn1+TW6ORb7ti2wLR8tZK7XvlyNjHJMZ6LocN5ZJPtSGgHhfLx1tZeno6s8aUej0YjlW47imbF94K8SsS3lPKKuHtv9iVEoKCqFxVyF5fdY6+w4YskiMpg5NWpNFtQYzRg7IAbfplzCTd3aocrEUFRZi+TzRYgM0aGsxoSc4mrUmkWYLNZAhdECGC0WwGCtf66LO7+5o9eqoNeoYTCZoVFxSCs6giB/Dfw1KmjVvDX4pOahVVufqziG1PwqfHkoF3p/jeQ1SV6Bh0Zlfc5zHFQ8B54DeJ6D6upzjgPMV88rjuMUr/Ec+8D5vKnvyCfnEXHOox6cr/1seR3Pfee61XfxXudFgQE0+cgLx+MC4HQNbHFbZ4vZhOWbD0MtaBSve6/3yCZP2pgxhsrqWvxjy1HMHtEDH+w7D1FkmDYkBuv3Z2DakC747+85EEWGuxNjAV4Fo1lEtcGILSk5sIjAsO4R+PnERZgtFvBqDRg43BAXAREcaowmHL1QDAtj6NY+CBbGwSKKsDBY/xUZRBEwiyLMFhF5ZbUQRREWUQRjADgOP51NRlmNCYwBX6bkwcIYqmrN8NOowHMc+KtBKY6zpgfrBPAcD563hjNLqwwID/SDiufBAeA5DgDDlfIadAzRQa3ioeY5XCquAM9x6N0pFH4aNdQccDqvFJU/nIZWzePohWKoIAIWE7QaNdQ8B52hFlpBjZOXyhEc4A+dRgWdRg2dpuEjnzwZeVbfc4lGKbUsnwwQMcawfv16Wfobb7xhD9QAwMyZM7FixQqcPHnt15hz585h7969GDFiRIvuc/369bKAzqxZs+zBIQCIiorCwoUL8dxzz0nyffTRR5IA0blz57Bv3z5JnrCwMMmIIo7jsGzZMnzyySeS/X7xxRdYuXIlAgKa9i5fgHURyvf2Zdl/KfMXVLghOhgmoxHP3tELiV0joFVLP5DM8OxXFE8W9azPIo62bZp7YWtCCPFmFBzyTHMvFtyQxYubsw6Ox9tUdXO1gLcn+3JO92QhbU/6rL7XH/VZwF3Fc9Dr/BF0tQ6jekbgdG4pHhsWZw8mvfXjSXsg4Z0dZwEAs27tjBU/pqHGaAIT/FBrtAaYhvbsiAqDGXtPX0bvqBBUmxkqa00orzEh80oFTCJgsjCYResDAKoMFlQZrgXIrpwvrvN4AeDXrHKP8tXHv3/Ph4rjwPPWL8Bmi2gfY8ZxteAAcHweOHD4+ng+OHAorzGC44Bd2b/av2iDs25v+yJt/S2WoaCsBjxfC47jsO98CTgA+WU1OPLvZKh4HowxXCqpxrErx5FbUgOeN4LjOBy5WIac4hqcKU1F1pVKqFRVABhEiwUqFY+8L45DUNu+6FuDYDzH2Y+Fu/p/JlqQklMFlUoFnldh+faz4DkOR/INWLnrHAS12h5IY6KIo5cq8dGvF6AR1OCvbn8yvwr/Tb4IrUaw7uvqMdvajOc4iBYzsiss2HGqADzP4WKFGaJFhNloAscBvErALxlFUAuCfVSU0WRCVnENRLMJRiMHlZph95krYAAyi6oBADtOFwAMyKkQsT2tALxKDfPVAIntYXb4v+25yBjMFnY1qHI1j4XBwhgMRjMOXagEY8CCjSdgYdZz1GgWYTCZkVVYCZEBv64+ALMIGEwWlFQZIDLgP8d2w2S5FjAFgE+O/mb//3+OWkfafZN21J625ZTy+b0/s8zhmfXzIOVSlSxfRmGtx+ezo7Laa++xSuO1/xtqlD97qssMsrRyQ7Vi3uKaClla7ukrkuenCy4qbGn7kd5a7jfH5QFmQWV9Hwk8h52Z/4Neq4a/mkd+aRU0Kh5V359GsE4LnRpIza/Ct8fyEBbgDy0vosTAoFHxKK4yIkylln2/A7zj7xrxjE8GiFJTU2ULRIeGhmLw4MGyvKNHj5YEawDg+++/r3eAqKn3uXXrVtl2Y8aMUSzL2Q8//CB5rlRWUlKSJHAFAJGRkejXrx+OHTtmT6uqqsKePXswYcIEWRmNxXEcHrk5Gr+mF2DBHT0xuEs7MIsJb/14EomxIYofHjaOw0wBSIYuutvG1eKc7hfUvDaU3XmxUZpqQQghrtV3uk9b0VTtcD3+Hnm2cHbz7ceTbWzTRura3nF6UFPldb4eUcrvyYLlSmUqvW8cr2UYA0SzCf4CBw2vgp9Oaw9yPXJztHU6UXUV5o/qCo1GY9/2ne1pELT+4HkV5twWA/BqvPnjSUy7KRaVRgs2/JIBE+NwR79oVNSaUWu2WL+km0VYwMNwdZRTjdGEk7lliAnXwyRaR4abRMBgtsBgssBgFmE0izBarItwm8zWQIAnLCKDBQxwOwjvamDLKM1UYXC/DqjDXgDg6jpRVgWVZZIceRW216xf3i+WWoMCF+1f2qWj37NKCjzct9TR3GtBtpRc5cXPD2bLv/z/4mFw7uf0Upev/ZSeUuf2288ekzzfmnYt6LA9vcSjOtTH2SLXI+CKqhUCKS5G/nGwBl/9BR4mC0Owvxo1JhEqDogM9oNWrYKg4sFDRH65ATzHoUd7Pc4VlIMDg0bQQMVzGBQTjAB/PzCLCUdyyqDiOAzvHgY/jQZ+WgFgIrSCNagnWizgOUC0mLH9VCGYxQKzyQAOgFqtxj2DovDjqWJw4HDfoA7geQ5f/J6Nh26Og1oQIDIGtSDAaDTh4/0ZmHpjNFRqa7rIgP8eOI8pgzpDLQjgeRVMZjNEkWHT71mYMDAKnEoNo1nEN8mZsDBgRK8OEDkeRrOI3afyrTMyGIeDmcUwmiwwmEwwi7AG68DDLDJo1DxqTSKqjRZ7APnaaEWGzEJ5gOpcsTTw9GvWSVmeTw9ZZ9po1DwCtWoE+FmnuuoEFS6XViO38hiC/LUI9FMjWKeBv5pHiF6LIH8BAbb8WjVgqoXFYrluC9UraezUOV+eJueTASLHtXRsevbsqZi3d+/esrSUlLo/KJtznyaTCampqR6V16tXL1laYWEhcnJyEB0d3aC6OQaIbHVrjgARADwxvAuqq6oxKCYEGjUPg4ej8R2HmQKQDF10xXFIsVJ5SgtqKg2blw5xb/rpA4QQ0lo4L0zcHNOdfFFTtUNzT2ez7UOprk3dlw2Zwmi7Flh4V3+s3pVR5/Rxx/Yy1Fa7vRmFc15X9XK+HlE6Dmma8oLlzmU63izDcRqc47WMdPqdlOPCs47HAUAy/e6fP57Awrv6Q8OJ+PbwBTBRRHsdD7WgwdheYU7T78yS665AWBAe4wezqcZeru31v311EBYLg1rwd0ozQ6UW8My4/lizKwMmkxFmsxlzxyTg3Z9OQKsPgGgRUVVZDpVawKwRvfDhzjRodAEQRRGG2hp76E3j5w9RZKipqgADoFKpoVILeGBoPNRqAQajCRv2nATH87h/WE8IagEiA4wmI774XwZUggYcr8K4hHb4+vfzUKkFjB/QCVtP5MNsNoHjeNzZLwpbj10Er7Z+Gb+9Zzh+Op4Lk8kAnldD0GghMgaT0QhepUJSz0jsTb8Cs9kMk8n65ZHj1RjaswMOnCsEY0BiXBj+l54PcBxUauvaWjfEhMAiMiSfvwJRFHFDl/bgeB6iyGAyW3A8uwgM1i+VZrP1HOdVKnRtF4hzV6wja7q0CwA4/moggcFkFnHhSjnAcYgK1UNkDHmltRCZCPHqlCeO4xAZ7A8AKCirQYcQnfX/FQaAMYiiCJ635skvqQKvUoHjOHQM9gfHAbnFVdaRSByPmHAdLhZXIT4y6OoIKEDN81DxHNTWoVBIu1QKHhxuiLPeYOZI5hXcHB8JrUYNJoo4eDYfKp7H7b0j8b+MQkC0QDQbwXMcBEENngM0ggAODBaTARpBgCAIeHJUb+h1fmBmM97degRaPz88M6YX/v1rDgBgzm0xWL0rA/NGxuP9/dYf9J8e3d0+Ou9vXx3EoE7h0rseWszw0wVcfe9UYs6orljx/VEMjLK+93Jy86AWNFh4V3+s3JZmn8ZnO88BIPrqNFmDw6ih3cezEHE1ffPBMwAAvaDBlpRsp/eZH0K1wI7UXMkNA9r5c/j1TJ79M2TltnN4Zmwf/JrG48j5Anv68XNaWCxm5BWV2dOKi0tgqCrHwrv6430VB7PJaP9cAOBwvNX2mxPwghaMU+GBm2Px/u50GEzWuzvec3N3VBst2HTwPEwicGO39qgxW6fApVwoRlSoDhUGM7KuVMLMrIEle5DJLKLILF1vDQAulEpHO9Vl3eECaFQ8IoP87EEknUYFraCCn1oFP4GHv6CCn2D9v5/9/1efq1Xw11j/r1Vb0/01KvhdnSLreP7yTjdEauzUOV+eJueTAaKMjAxZWmRkpGLe9u3be7T99dxndna2PSJZV3l+fn4ICgpCebn0F4SMjAx7gKgl2uN6qGsYd13bKL2mtKCm0lD25p4+QAghrUV9p/u0FU3VDtfj75GnU7maaz+ebOPp9PGGLNTtSZnunjunuVqw3NMyPb3+UQsa+/WLq+Oo69rGcfqd4xR7x/Z27gPb/zmed5mm5jlotVqoeA4WtQqBfmro/bUQBBWYioNKp4Va0KBTiB9CA/wgaK0BBAN3bXS5n85aL4PKz2EfAm6IDrF/8d8VqgPH87i5S5jkbnW/nsyGoPWDSqXG8PgwpJzLB8fzGNkzAmcLqq8GG80Y26cdTudcseed0Lc9zuUWwmJRXa2D/uoXauvx/TExCmU1ZskXb7WgwWO3RsNy9cvxnNtiYKqpsgcgVCo1nh7dHQDwj83V4Hgez03oJbu7ni1Q51ju02N6yQIeNrbtrOUNAGCdmuhcN9trzlMYbfls+1m5Lc3eDtfqm2JPswVhXH1ZNhgM9qmRtu3f+tGMhXdcC9T8o7IcHM/jwRs7obLWIgtgWNvcFsTgrh6DGtFhOnsZOj8tOJ5ze4djZ55MQ3XOZ3tvKKW7W46irumzSgs+K73/bJ8hdX22OKfVZ6qsWtBA0GqhUqnRIUiLED8VLIL1JgRJ3SMAACfOXQLH83jq9m6K02Edz5F5I+Nh5lSoqDWjstaMiloTKgxmlFbWYNPBTBjMFjBegEkEukTocPxiGTqE6FBttKCi1owKgxmVtSbUmKzLoVhEoEYUkVWkPOWuKfGcNOCpUnEwmCzgOQ7fny6BoLK9Zv1XUFmniOaVVuPoR4egsvWfdeYrGBNxsbgaqesPg+d4cBxgG0hkrmn+42kMnwwQlZWVydL0er1iXp1O59H213OfrvbvrjznAFFd5TVlexgMBoch7tfyO9fJ1baVZSW4cuWK/UPF8bly/mKYTSaIFjOuXLFGmivLrPOIbc+rykrA8bzD6yUoLCxEVVkJ1FotmMhgdJq/azYZZelKad6Q11vr5Q15vbVedLzUNnS81DYqlRqFhTpUlZXAIpq9or6+fgxqtQaFhYX2a4P6HK9tW1fXBp7WS63WSK4/bO3S2HZ0LpfjeY/qK1rM9nwW0ey2HRzzOpap1DbO113O7a3UDq7SHPusPnWoq39s+7NdUzpeDzoGXGxlOvaDrX1d1auuPnN1LjqmA5CUYTsXlK5flerr6twHILt2dj5+pX5z7B/b9bdzPud+cK6vY9t4cg1vq6vjPp37y9V72t37xFUZtja3fRdwbi9X54Pje6euc9Sx7er6bHSV7rgv5/ZVqoNjm7s7T50/b9y1r6vjdS5T6bNJ6buc8zlSXFQIrVYLDYAwFRCmB6DnYNCrcFBTA4vaDK2/HiqVGtNvDsNH+8vw5Kho2Xn9zo/HUWsyg9foYGE8xvbrACPjUVlrhsFsQa1JRI1JhNFsQe3Vqa41JgsMJhEGiwW1JgaDyfaa9d/aq9Nia83W0XfORNgmmspVyGd/SlwqdP1d+uKVUvm+rvaJq2nILY75oEcffZTBOjHZ/njkkUcU8+7cuVOWV6VSteg+9+/fL3sdABNFUbG86OhoWd5PPvnE/nq3bt1kr69du1axrFdffVWWd9SoUW6PfcmSJYr1pQc96EEPetCDHvSgBz3oQQ960IMe9XucO3eurhBEi/DJEURKo2BMJpNCTuX0htyxqyn3qVSWbTulRZgbUl5TtseLL76IhQsX2p+XlpYiNjYW2dnZCA4Odrst8V3l5eWIjo5GTk4OgoLk6zqR1oH6ue2gvm4bqJ/bDurrtoH6ue2gvm4bysrKEBMTg7CwsJauiiKfDBApBSWqquS3J3SV3pCgRlPu09X+q6qqFANEDSmvKdtDq9UqDiUNDg6mD682ICgoiPq5DaB+bjuor9sG6ue2g/q6baB+bjuor9sG3s1aVi3JO2tVh/h4+V0hLl9Wvl1iQYH8dpRK21/PfcbExCgGgpTKq6mpQYXCxEfH8lqiPQghhBBCCCGEENJ6+GSAaPDgwbK0M2fOKOY9deqULG3QoEEtuk9BENCvXz9ZntOnT3uUFhERYb+Dmau6KW3nSd0IIYQQQgghhBDS9vhkgCghIQGxsbGStNLSUhw6dEiWd8eOHbK08ePHt/g+77rrLo+286SscePGyfLs27dPtt7Q5cuXkZqaKknT6/VISkqSbe+OVqvFkiVLFKedkdaD+rltoH5uO6iv2wbq57aD+rptoH5uO6iv2wZv72eOMW+9v5p7S5YswdKlSyVpY8aMwQ8//ABBEAAA69atw6xZsyR5unXrhrNnz4LjOHvaiBEjsHfvXkm+zMxMxMXFNds+z507hx49ekAUr91mT6fT4ffff0dCQgIA4NKlS0hMTEReXp6kvH379uG2226TpCUlJWHfvn2y+v7lL38BADDGMGvWLKxfv16SZ9asWfjoo49ACCGEEEIIIYSQtstnA0Tl5eXo3bs3Ll26JEnv2bMnkpKSkJOTg23btkkCMADw3XffYdKkSZI0TwNETblPAHjqqaewatUqSZper8fkyZOh0WiwZcsWFBUVSV6fMmUKvv76a1lZv//+O2655RbZvocPH47evXsjOTkZR44ckbwWGBiItLQ0dO7cWVYeIYQQQgghhBBC2g6fDRABwO7duzF+/HjU1NR4lH/evHmygAzgeYCoKfcJABUVFRg5cqTiNDUl8fHx2L9/Pzp06KD4+htvvIEXX3zRo7JUKhU+++wzTJs2zaP8hBBCCCGEEEIIab18cg0im9tvvx0//PCDZMFmJYIg4OWXX8a7777rVfsMDAzEzz//jIkTJ9a536FDh2L37t0ug0MA8MILL2DlypXQ6XRuywoPD8fnn39OwSFCCCGEEEIIIYQA8PEAEWAN2Jw+fRqrVq3C6NGjERUVBY1Gg7CwMPTv3x+LFi3C8ePH8frrr0vWAPKWfYaEhGDz5s34+eefMWPGDMTHxyMgIAD+/v6Ii4vD1KlT8dVXX2H//v0eTQV7+umncfr0aSxevBiJiYmIiIiAIAiIjIzEsGHDsGzZMqSnp+Pee+/1+Hirq6uxZs0a3HHHHYiOjoafnx8iIiIwcOBAPP/88y7vmEaaT0lJCbZt24alS5di/PjxiIiIAMdxkseIESM8Lo8xhq+//hr3338/unbtCr1ej6CgIPTo0QOPPvoodu7cWa/6nT59Gs8//zwGDhyIiIgI+Pn5ITo6GmPHjsWaNWtQXV1dzyNuu06fPo1//etfmDlzJm6++WZ06NAB/v7+0Gg0aNeuHYYOHVrv9+HOnTvx6KOPokePHggKCoJer0fXrl1x//3345tvvqlX/XJycvDaa69hyJAhiIyMhFarRVRUFIYPH47ly5ejuLi4vofcJlVUVOC7777DK6+8grFjxyIhIQEdOnSARqOBv78/IiMjccstt2DevHn1ej9SX3u/8vJyREdHyz7DOY5DVlZWndtTH3snpf6s61FbW+u2zOTkZDz11FNISEhAaGio/Vrx7rvvxoYNG2A2mz2uX3FxMZYvX47hw4cjKioKWq0WkZGRGDJkCF577TVcvHixsU3QJlksFmzcuBGPPPIIevbsiZCQEPs10ODBgzFr1ix8+umnKCgocFsOva+9R1ZWVoPez558jlM/ey+LxYJNmzbh4YcfRr9+/RAaGgpBEKDT6RAVFYURI0Zg8eLFOHv2rEfl+VRfM0Lc2L17N4uJiWEAXD4EQWCvvPIKE0WxpavbZsTFxbntEwAsKSnJo7Kys7PZ0KFD6yxv8uTJrLS01G1Zoiiyl156ianVardlxcbGsr179zZBS7Ru06dPr7NfbA+e59mzzz7LTCaTy/JKSkrYpEmT6ixr2LBhLCcnp876vfPOO0yv17stKyIigm3atKkpm6VV+uabbzzua1sfXbp0yWV51Ne+48knn3TZppmZmS63oz72bvV5P9seNTU1imUZDAb26KOPMo7j3G7ft29fduLEiTrr9uWXX7Lw8HC3Zen1erZ69eqmbpZWbd++faxPnz4e9fWf/vQnxTLofe19MjMzG/R+dvc5Tv3s3c6cOcMSEhI86l+VSsVeeOEFl9+DfbGvKUBEXNq1axfz9/f3+ANw3rx5LV3lNiM2NrbO/vAkQJSbm8u6du3qcR8PGTKEVVZWuixvzpw5Hpel0+koSFSHyZMn1/tCZPr06YpllZeXs8TERI/LiY+PZ3l5eS7r9ve//93jslQqFfv888+bqZVah/oGiACw/v37M4PBICuL+tp3/PLLL26/9LsKEFEfe7/6vp8B5QCR2Wxm48eP97iMdu3asbS0NJf1+vTTTxnP8x6Xt3z58uZsplZj48aNTKPReNyuSgEiel97p8YGiJy/9FM/e7fS0lLWuXPnevfzX//6V1lZvtrXPr1INWk+dd2xLTs7G9u3b/f4jm2kacXFxeHChQsAALVaje7du+PUqVOSPElJSdizZ4/bciZOnIjvv/9ekhYWFoZJkybBYDBg8+bNqKqqkry+YMECvPXWW7KyvvrqK9nURZ7ncccddyAmJgZ79+7FmTNnJK9HR0cjLS0NAQEBbuvZVt1999347rvvAFinKwwaNAgJCQngOA6HDx9Gamqq4nY7duzAqFGjJGmu7po4adIkaLXaet01MTk5GTfffDPdNbEJffvtt5gyZQo0Gg0GDBiA7t27IzQ0FBUVFUhLS3N5M4Mvv/wS9913nySN+to3GI1GDBw4UPbZ7cjVDTOoj72f8xIDM2fORFBQkNtt3nzzTajVaknaihUrsGjRIkmaIAiYMGECwsPD8dNPP8mmgyUmJuLgwYPgeelKEhcuXEBCQoLs7/qgQYNw44034tSpU9i3b5/kNZVKheTkZNxwww1u696WHTt2DDfddBOMRqMkXafTYfjw4YiLi4Moirhw4QKSk5NRXFyMP/3pT3jzzTcl+el97Z2Ki4uxdOnSOvMdOnQIv/76qyQtMTERycnJkjTqZ++m9JkLWPvyhhtuQGFhIX744QfZ+z0kJASXL1+GRqOxp/lsXzcorERavcWLF8sikaNHj2ZGo9GeZ+3atbI83bp1o6lm18H8+fPZ8uXL2f79+1l1dbXirxt1jSDatWuXbJuOHTuy3Nxce57U1FSm0+lkEenz589LyhJFUXHa27p16+x5jEYjGz16tCzP0qVLm7JpWpXJkyezoKAg9sILL7CLFy9KXhNFkb377ruKvxrMmDFDkjcjI0P2i7FOp2Opqan2PLm5uaxjx46ysvbt2yer1/Dhw2X5lixZIqmb0vS4WbNmNW0DtSInT55kW7ZsYdXV1Yqv79y5U3FE58KFCyX5qK99h/Pf2bCwMFk7Ko0goj72DZ70ZV3Ky8tZQECApBye59nOnTvtecrKyli/fv1k+9uwYYOsvEceeUTx74XjdZvS9d/IkSMb1AZtgSiKrH///rI2e+ihh1hRUZEsv9lsZnv27GHff/+9JJ3e177v5ptvlrXnJ598IslD/ez97rnnHll7LViwQJLn0KFDTBAEWb5jx47Z8/hyX1OAiMiIoqi47tChQ4dkeZXmZ+7evfv6V7qNa0iA6KGHHpJt8+abb8ryzZs3z+0HEmOM7dixQ5anb9++srKSk5Nl+bp06UJBRRc2bNjA8vPz3eZRmtc8aNAgSZ5XXnlFlmf+/PmyspYvXy7L5zxlLSMjQ5YnLCxMEjxmjLH8/HzZH0a9Xs8qKioa1hhEcZrJU089JclDfe0bTp48KZmOMmvWLJaUlCRrb6WgAvWxb/CkL+vy73//W1bOhAkTZPk2btxY5zVAeXm54g8+zn9jDAYDCw0NleTjOE72wxCx+vHHH2Vtf+edd9b7uobe175N6fq2Q4cOsmng1M/eT+layzHwYzNw4EBZviNHjthf9+W+9vm7mJGml5qaiuzsbElaaGgoBg8eLMs7evRoWZrzlCXinbZu3SpLGzNmjCzNkz72tKzExESEhIRI0jIzM3Hy5Mm6qtsmPfzww4iMjHSbJykpSZZWU1Mjed6Yvv7hhx/qLCspKQmCIEjSIiMj0a9fP0laVVVVndMeiTLGmOxzGQB69OgheU597f1EUcRjjz1mH54eGRkpm2riDvWxb9qxYwfeeOMN/PnPf8Zf/vIXfPDBBzhx4oTbbTzta+cpxQCwb98+VFRU2J/v2bNHdgfR/v37y/7GaDQaDB8+XJLGGJOdO8Rq/fr1srSG3DmZ3te+7Z133pGlPfHEE5LpRgD1sy9wvq4CgMuXL0uei6KIwsJCSZptyQ8bX+5rChARGef5i4B17SElvXv3lqWlpKQ0eZ1I07pw4YLiLRCV+lmpj1NTUyW3063POdOrVy9ZGp0zDWexWGRpsbGx9v+bTCbFtYqU+kepbwoLC5GTk2N/Tp8P1w9jDBUVFTh48CDuu+8+2ZfJkJAQPPDAA/bn1Ne+Yc2aNThw4ID9+apVqxAaGurRttTHvmv27Nl48cUXsXz5crz22muYM2cO+vfvjwEDBrj8Yc3T/gkNDUWHDh0kaYwxHD16tN5lAdTX9fHLL79Inrdv3x7t27fHokWLkJCQAJ1OB71ej549e2LOnDmKQUF6X/u2goICfPHFF5I0jUaDJ554QpJG/ewbZs+eDZVKJUl75pln8Ouvv6KmpgY5OTl48sknZWu/zZ49276mqq/3NQWIiExGRoYszdUohvbt23u0PfEuSn0UHBwMrVYrS1fqY4PBIPngonOm5dgWsXZ055132v+fnZ0tW0gPUO4fPz8/xUVUHfuH+rr53XnnneA4DjzPIygoCDfffDO++uorSZ7Q0FBs2rQJYWFh9jTqa++Xk5ODl156yf588uTJssX93aE+bn2OHz+OiRMnyhbBNZvNyMrKkuVvaP9QXze9goIC5ObmStKMRiP69OmDFStWIC0tDTU1NaiurkZ6ejo++OADDBgwAIsXLwZzuEcQva9923vvvSfrv2nTpsnanPrZN/Tu3RsffvihZHTOqVOnMGzYMOh0OsTExOCDDz6QbHP33XdLRgL7el9TgIjIlJWVydL0er1iXp1O59H2xLs0to+dy6BzpmV8/PHHsjtmhIWFYfr06fbnrtq2of1Dfd3ynn32WZw+fVo2rYT62vvNnTvXPu0nODgYa9asqdf21Met15IlS7Bx40b78/LycsV81Nfe48qVK7K00tJSVFZWutyGMYb/+7//w2uvvWZPo/e17zKZTHjvvfdk6c8884wsjfrZd8ycOROHDx/G1KlT3eaLjIzE1q1b8c0330ja2Nf7mgJERMZ5jjoA2W1XbZznPgJw+4eReIfG9jEg7Wc6Z66/n376CY8//rgkjeM4/Pvf/5as86TUN0DD+4f6uuWtWrUKTz/9tGyaKPW1d/v8888lU4mWLVuGqKioepVBfew71Go1Jk6ciA8//BCpqamorKxEeXk5Dh06hBkzZiiuUfPCCy/Yb2FMfe39SktLXb4WFBSEP/zhD3jooYcU3+evv/46zpw5A4D62pdt2rQJeXl5krShQ4cqrttK/ew7Kioq8NFHH2Hbtm1u812+fBmPP/64bC0yX+9r5ZJJm6YUeTSZTIp5ldJt8y+J92psHwPSftbpdJLFMOtbHp0z9bNx40Y89NBDsuGrb775JqZMmSJJczUCzGQyyRZPtKU7c+5rT7bxpCyibMqUKejVqxdEUURZWRmOHz8uWUvEbDbjiy++wOHDh7F//3772iPU196ruLhY8ovy8OHDZQFeT1Af+47s7Gx07NhRlj548GCsW7cOAwYMwIIFCySvnT9/HikpKRg8eLDbvvY0nfq6eSm95wAgMDAQR44cQbdu3QBYA0nDhg2T3JDDYrFg7dq1WLZsGb2vfZjS4tRPP/20Yl7qZ9+Ql5eHO+64Q7aG0K233oo+ffqguLgYO3bssI/yvHjxImbOnIm8vDy8+OKLAHy/r2kEEZEJDg6WpVVVVSnmVUpX2p54l8b2sXMZdM5cP++99x7++Mc/yoJDf/vb37Bw4UJZfldt29D+ob5ufnPmzMHbb7+Nd955Bx9//DFSUlLw+++/o1OnTpJ8GRkZ9osRgPram7388ssoKCgAYF1v4MMPP6z3XY4A6mNfohQccvTUU08hPDxclm5bjFRpTQqA+tqbuOqjhx9+2B4cAqw3FHAOBgLA//73PwD0vvZVhw4dwm+//SZJ69y5M+655x7F/NTPvmHu3Lmy4NCGDRvw66+/4sMPP8RXX32FM2fOIC4uTpLn1VdfRXp6OgDf72sKEBGZ+Ph4WZrz7f1sbBe8dW1PvItSH5WXl6O2tlaWrtTHWq0W0dHRbsujc6bp/fWvf8WTTz5pn4IAWKeVvfvuu5JAgaOYmBjFXyuU+qempkY2EgyQ9g/1dcu48cYb8fbbb8vSN23aZL+THfW19zp16pT9/zExMVizZg2effZZyUNpEcmlS5faX6+pqaE+bkVUKpXi7ZRtt05Wq9WyLyBAw/uH+rrpde7cGTwv/yrl6Z2KbO1M72vfpDR6aO7cuS6n/lA/e7/i4mLZzV8GDx6Mhx9+WJLWoUMHLFq0SJJmsVjw9ddfA/D9vqYAEZFRmjdrmyftzPGi12bQoEFNXifStGJjYxV/uTx9+rQsTamP+/XrJ/kDqHTOKJXlKp3OGfcYY1iwYAFeeeUVSbpGo8F///tfzJ8/3+W2giCgX79+snSlflBKi4iIkAQD69PX9PnQtJT6sbKy0r5QKvW1b0hPT8fKlStlD+e7IQHAunXr7K8bDAbq41ampKRElua48Kin/VNSUiL7ssBxHAYOHFjvsgDqa0/p9XrFwE99tgfos9sXKd3a3t/f3+3UYepn75eeni65wyAAdO3aVTFvly5dZGmZmZkAfL+vKUBEZBISEhAbGytJKy0txaFDh2R5d+zYIUsbP358s9WNNJ277rpLlqbUn570sadlJScny1bS79KlC/r06VNnfdsqs9mM6dOny0aPBAYG4ocffsC0adPqLKMp+3rcuHGyPPv27ZPNeb58+bJsiK5er0dSUlKd9W1rbCOA6uK4foUjf39/+/+pr1s/6mPv991337lcH8Lm5MmTij++OX4ZaUxfDx8+HIGBgfbnSUlJsnUsUlNTZYElo9GIffv2SdI4jlOsCwHGjh0rS1PqV6Uvc46/6tP72re8//77sqn+Dz74oOKPr46on72b0gLPtqCPJ+mt5nqMEaJg8eLFDIDkMWbMGGY0Gu151q5dK8vTrVs3JopiC9a8bcrMzJT1RVJSktttdu3aJdumY8eOLDc3154nNTWV6XQ6SR6VSsXOnz8vKUsURRYXFycrb926dfY8RqORjR49WpZn6dKlTdkUrUp1dTWbOHGirM3at2/PDh8+7HE5GRkZjOd5SRk6nY6lpqba8+Tm5rKOHTvK9rVv3z5ZecOHD5flW7Jkif11URTZjBkzZHlmzZrVqPZorVJSUtiNN97INmzYwMrKyhTz/PbbbywmJkbWpp06dZLko772TklJSbI2qu+jpKSEMUZ97AsGDBjA4uPj2Zo1a1hFRYXs9ZSUFNarVy9Zm/r7+7PKykp7vvLychYQECDJw/M827lzpz1PWVkZ69evn6ysDRs2yPb7yCOPyPLNmDFDct2mdP03cuTIJm6h1uPEiROy9goMDGQZGRn2PCUlJSwhIcHtNRK9r32H0WhkUVFRsrY7fvx4ndtSP3u3oqIiWf8AYJ999pkkX15enuL3nlWrVtnz+HJfU4CIKCorK1P88OvZsyd7/PHH2bhx4xTfQN99911LV71NePfdd9kzzzxjf8ycOVPWF506dZLkeeaZZ9jBgwcl5UyYMEG2XVhYGJsxYwZ74IEHmF6vl72+YMECxTpt2rRJlpfneTZu3Dj2+OOPs549e8pej46OVrx4JlYPPPCArM0AsAkTJsj61vnhbP78+bJy9Ho9e+CBB9iMGTNYeHi47PUpU6Yo1uvgwYOK7//hw4ezOXPmsEGDBileMOfk5DRzi/mmlJQUeztpNBqWmJjIHnroITZ37lw2bdo0NnDgQMXzAAB79dVXZeVRX/smpSBSZmamYl7qY+82YMAAyXt62LBhbObMmWzWrFls2LBhiu0NgL388suyst58801ZPkEQ2D333MMee+wx1rlzZ9nriYmJzGw2y8rKyspS/Ls+aNAgNmfOHMUvICqVql4/SLRF06dPl7VbUFAQu/fee9nDDz+seD0dFxfHDAaDpBx6X/uG//znP7K2u/322z3envrZu40cOVLx83no0KFs9uzZ7A9/+AMLCgqSva7RaFh2drakLF/tawoQEZd27drF/P39Fd8kSo958+a1dJXbjIb+Gu34axVj1sh1165dPd5+yJAhkl83nc2ZM8fjsnQ6Hdu7d28zt5Rva8yoA2fl5eUsMTHR4+3j4+NZXl6ey7r9/e9/97gslUrFPv/88+ZsKp/mGCCqz+PGG29kNTU1svKor31TfQJE1MfezTFA5Olj5MiRrLa2VlaW2Wxm48eP97icdu3asbS0NJd1+/TTT10GqJQey5cvb86mahWKi4sVRwi5evj7+7PffvtNVg69r33DLbfcImu/b775xuPtqZ+925EjR+r1/df2+Mtf/iIry1f7mgJExK1du3ax6OhotyegIAjs5Zdfpqll11FTBYgYY+zChQvs1ltvrXPbSZMmsdLSUrf1EkWRvfjii0ytVrstKyYmhu3Zs6eZWqf1aMoAEWPWYe5KU9acH0OHDvXoF4eVK1fKpiA6P8LDw9nGjRubumlalfoGiNRqNXv88cfdBmupr31PfQJEjFEfe7P777+fcRzn0fuZ53k2f/58Vl1d7bK82tpaNmvWrDrL7Nu3Lztx4kSd9fviiy9YWFiY27L0er1kugRxr6CgQHEavfMjLi5OMThkQ+9r75acnCxrvy5dujCLxVKvcqifvduePXsUp/UrPTQaDfu///s/l2X5Yl9TgIjUqaqqiq1atYqNHj2aRUVFMY1Gw8LCwlj//v3ZokWL2KlTp1q6im1OUwaIGLMGdr766is2depUFhcXx/z9/VlAQACLj49nM2bMYDt27KhX/dLS0tiiRYtY//79WVhYGNNoNCwqKoqNHj2arVq1ilVVVTVBK7R+TR0gsvn555/ZjBkzWHx8PAsICGD+/v4sLi6OTZ06lX311Vf1CvZmZ2ezxYsXs8TERBYREcEEQWCRkZFs2LBhbNmyZayoqKixzdDqiaLIDh8+zP7xj3+w++67j/Xr14+FhYUxtVrNBEFgISEhrGfPnuzuu+9mK1asYBcuXPC4bOpr31HfAJEN9bF3ysrKYv/617/Y/fffzwYOHMjCwsKYIAhMq9WyDh06sBEjRrBXX31VtqafOwcPHmRz585lvXv3ZsHBwUyr1bLo6Gg2ceJEtn79emYymTwuq7CwkC1btowNGzaMRUZGMkEQWEREBEtMTGSLFy+mKSgNtHXrVvbwww+zbt26Mb1ez/z8/Fjnzp3Z5MmT2dq1a2XTylyh97V3UlrH680332xwedTP3qu6upp98sknbNq0aaxXr14sODiYqVQqptPpWKdOndiYMWPY66+/7vE1mS/1NceY073cCCGEEEIIIYQQQkibQre5J4QQQgghhBBCCGnjKEBECCGEEEIIIYQQ0sZRgIgQQgghhBBCCCGkjaMAESGEEEIIIYQQQkgbRwEiQgghhBBCCCGEkDaOAkSEEEIIIYQQQgghbRwFiAghhBBCCCGEEELaOAoQEUIIIYQQQgghhLRxFCAihBBCCCGEEEIIaeMoQEQIIYQQQgghhBDSxlGAiBBCCCGEEEIIIaSNowARIYQQQgghDRQXFweO4yQPQgghxBdRgIgQQgghLil9+W3IY8SIES19KG1OVlaWYl9kZWW1dNUIIYQQ4oUoQEQIIYQQ0oJGjBhBQRwvQX1BCCGkLaMAESGEEEIIIYQQQkgbp27pChBCCCHEe82aNQvFxcWy9PLycqxbt06WPnPmTAQFBcnS4+Pjm6V+hBBCCCGkaVCAiBBCCCEuLV68WDE9KytLMUC0ePFixMXFNXOtCCGEEEJIU6MpZoQQQghpVkajEdu2bcNf//pXTJkyBQMGDEB0dDT0ej00Gg0iIiKQmJiIJ554Anv37nVb1owZM2RrxOzZswcWiwUffPABbrvtNkRERIDnecyYMUO2fXJyMqZNm4aOHTvCz88PsbGxmDFjBo4fP+62fFdEUcSmTZvw8MMPo2fPnggJCYFWq0VUVBTuvPNOrF69GjU1NbLtHBeQVjrmLl26eOVaOGlpaXjuuecwZMgQREZGQqPRICwsDDfccAMWLVqEs2fPut3e1Ro/tv4bPnw4wsPD4e/vj549e+KFF15AaWlpnfXavn07JkyYgPbt28Pf3x/dunXDvHnzkJmZ6Xa/QPP1hdFoxNtvv40hQ4YgJCQEer0eAwYMwN///nfFc4IQQghpcYwQQgghpJ4yMzMZANkjMzNTlvfEiROKeV09JkyYwEpLSxX3O336dFn+zZs3s2HDhsnSp0+fLtn2rbfeYjzPK+5TEAT2/vvvK5a/e/duxbqkpKSwXr161Xk8UVFRbO/evR61n7uHUts2VR/Vpaqqis2cOZNxHOe2jmq1mr300kvMYrEolpOUlCTb5sCBAywxMdFlmT169GD5+fku67ZgwQKX2wYEBLDNmzcr7tfWDo3ti9jYWNnrFy5cYH379nW5/S233MKqqqrq3Q+EEEJIc6IRRIQQQgjxKt9//z0efPBBj/PPnTsXv/zyi9s8n3/+ORYuXAhRFBVfN5lMeOKJJ9yOFnJ04MAB3HrrrTh9+nSdeS9duoTRo0dj586dHpXtbWprazFq1CisW7cOjDG3ec1mM/72t79h9uzZHpc/efJkHDp0yOXr6enpWLBggeJry5Ytwz//+U+X21ZWVmLq1KlIT0/3uD5N4fbbb0dqaqrL1w8cOIDXX3/9OtaIEEIIqRutQUQIIYSQ68LPzw8DBw5Eu3btEBERgYCAAFRUVCAtLQ3JycmS4MMPP/yAffv2Yfjw4XWWe/HiRQBATEwMbr/9dqhUKpw8eRIcxwEAKioqMH/+fMX6TJgwAWFhYdi1axcyMjJw4cKFOvdXWVmJe+65RzZNqGPHjkhKSoJer0dycrJ92hpgDUDdf//9SE9PR0hICIKCgvDMM88AADZt2oTc3FxJWUqLfSst/n09PPfcc/jtt98kaWq1GqNHj0ZsbCwuXryIbdu2wWw2219fu3YtRo4c6VGgr6CgADzPY+zYsYiJicH27dvtU8NsNm7ciLfffhvt27e3p2VmZmLJkiWy8oKDgzFhwgRotVr89NNPuHTpEvLy8lzuvzn64vz589BqtRg/fjzCw8Px3XffoaCgQJLn/fffx2uvvQZBENyWRQghhFw3LT2EiRBCCCG+pz7Tl4qKiti2bdtYdXW1y/K+/vprWVnPPvusLJ/SFDAA7PHHH2dGo1GSt6KigjHG2OrVq2X5AwMD2bFjx+x5jUYju/vuuxXLdp5itmzZMlmeBx98kNXW1kryLV68WJbvtddekx2Tu+lPjdEUU8xycnKYIAiyKXNnzpyR5Dt69CgLDAyU5IuPj5dNNVM6VpVKxbZu3WrPU1RUxLp37y7L980330jKeu6552R5OnXqxHJycux5Kioq2JAhQzxuh4b0hdIUs4CAAHb48GF7nszMTBYeHi7Ll5KS4r4DCCGEkOuIppgRQgghpFmFhYXhjjvugL+/P9LS0vDZZ5/h9ddfxwsvvIAFCxbg2WefVZzadeTIEY/K79GjB1avXi0biREQEAAA2Lp1q2ybxx9/HP3797c/FwQBK1asAM/XfWm0adMmyXOtVotVq1ZBq9VK0l999VX4+/u73dbbbd68GSaTSZL2yiuvoEePHpK0AQMG4I9//KMkLSMjA0ePHq1zH3fffTfGjRtnfx4WFoapU6fK8jmPKlLq1xdeeAGdO3e2Pw8ICMAbb7xRZx2a2pw5czBo0CD787i4ONx5552yfM7HRAghhLQkmmJGCCGEkGa3adMmvPLKKzhz5ozH2xQWFnqU74EHHoBa7fqSRilIMWXKFFla165dccMNN+Dw4cMuy7JYLLLXDQYDQkNDPapramoqKisr7cErb3fw4EFZ2ty5czF37lyPtj9w4IAkUKLkgQcekKV17NhRllZRUWH/v9FoxKlTp2R5lPp1xIgRCA8PR1FRkSdVbhINOSZCCCGkpdEIIkIIIYQ0q3fffRf33XdfvYJDAFBdXe1RPseRQEqUAgPR0dGKeV2lO5blaqFrTzDGcPny5QZvf71duXKlUdvn5+fXmSc2NlaW5ufnJ0tzbPeSkhJZP6jVasUgDFB3vza1hhwTIYQQ0tIoQEQIIYSQZlNYWIjnn3++QduyOu6YZRMSElLvsp2nftnYFrZuTpWVlc2+D2/hybGGh4fL0lQqVb33pdFoXE4RvB796qipjokQQgi5nmiKGSGEEEKazfbt22V3+2rfvj1WrVpln/rD8zwMBoPiCAtP1PXlPyIiwn6nM5vc3Fy0a9dOljcnJ8dtWbb6Oo78CAoKwsyZMz2ub0REhMd5W5rjXcNs7r33XnTq1Mmj7T25C11DhIaGyvqhuroapaWligHDuvqVEEIIIRQgIoQQQkgzys7OlqX9+c9/xn333SdJU1rrpqkMHDhQFiD6+eefMXDgQElaZmYmUlJS3JalUqkwaNAgHDp0yJ5WUVGBP/3pTx5NY7JYLLKRJEojSywWS51lXQ833ngjPv74Y0nawIED8fLLL9e5rdKxNhWNRoM+ffogNTVVkv7zzz/Lzq19+/Z5vJ6VN/cFIYQQ0txoihkhhBBCmo1Go5GlHTt2TPI8OzsbTzzxRLPV4a677pKlLV++HBkZGfbnZrMZixYt8igYcM8990ieM8Zw7733Ijc3VzF/eXk5Nm7ciAkTJuBvf/ub7HWlBavT0tLqrMf1MHHiRNkC4K+//jq2bNmimF8URfz222/405/+hCFDhjRr3ZT69ZVXXpGsm1RVVYUXXnjB4zK9uS8IIYSQ5kYjiAghhBDSbAYPHixL++STT3D69GkMGjQI+fn5+Pnnnz1ekLohHnroIbz66quSxaqvXLmCgQMHYsKECQgJCcGuXbtw9uxZj8qbP38+Vq5cKVls+vfff0dcXBySkpIQGxsLjUaD4uJinD59GqdOnbLfKj4xMVFWXvfu3WVpjzzyCCZMmGBfyyYhIQGzZ8+u13G7snTpUgQFBbnNM3/+fMTHxyMmJgazZ8/Gv/71L/trtbW1mDRpEnr37o0BAwYgPDwclZWVyMrKwvHjx1FSUgJAeaHmpvTkk09i5cqVMBgM9rT09HT07t0b48ePhyAI+Omnn1wG7pRc774ghBBCvAkFiAghhBDSbG677TYMGDBANmooOTkZycnJ9udjx47Ftm3bmqUOgYGBWLVqFe6//35JelVVFb744gv7c7Vaja5duyI9Pb3O8r7++muMGjUKtbW19nSz2YydO3fWu3533XUXVqxYIUkrLS3Fp59+an8+fvz4JgtKrFu3rs48d999N+Lj4wEAK1aswJEjR2TTAE+dOqV4q/nrJS4uDq+99ppshFBRURE2bNhgfx4QEICwsDDF6Y7OrndfEEIIId6EppgRQgghpNnwPI8vv/zS7aLGt9xyCz7//PNmrccf//hHvPXWWy7vcuXn54f169fjpptukr2m1WplabfeeisOHDiAhIQEj+vQsWNHDBgwQJY+cuRITJ482eNyrjd/f3/s3LkTjz32mMv2cyYIAkaOHNnMNQOef/55LFiwwOXrISEh+PbbbxETEyN7Talfvb0vCCGEkOZEASJCCCGENKsePXogJSUFixYtQvfu3aHRaBAaGoohQ4Zg5cqV2Lt3b4NuVV9fCxYswIEDB3DfffehQ4cO0Gg0iImJwcyZM3Ho0CE8+OCDitORlO52BlgXaz5x4gS2bNmCWbNmISEhAaGhoVCpVNDr9YiNjcWYMWPw0ksvYffu3cjJycGUKVMUy9q0aRNWrlyJW265BcHBwdf9tux10ev1+PDDD3HmzBm8/PLLGD58ODp06ACtVguNRoN27drhpptuwqOPPopPP/0UeXl5WLt27XWp21tvvYVt27Zh/PjxiIiIgFarRdeuXTF//nwcP34co0aNUuxXV3eT8/a+IIQQQpoLxxhjLV0JQgghhJCWdvnyZcTFxUmmjQUFBaGoqEi2UDPxHcePH5eN3OrTpw9OnjzZQjUihBBCvBONICKEEEJIq5eXl4dFixa5XF8oLy8PU6dOlQSHAOuaNBQc8l4pKSlYunQpLl68qPj62bNnZWtPAcCkSZOau2qEEEKIz6ERRIQQQghp9bKystClSxcA1ilvAwcORLt27WAymZCRkYFffvkFRqNRso1KpcLx48fRp0+flqgy8cCePXtw++23g+M49O3bF/369UNoaCgMBgNOnTqFAwcOQBRFyTZBQUHIyMhwOXWQEEIIaavoJzFCCCGEtCnp6el13qkMAN577z0KDvkIxhhOnDiBEydOuM2nVqvxxRdfUHCIEEIIUUBTzAghhBBCHERFRWHLli147LHHWroqpAn16NEDe/fuxZ133tnSVSGEEEK8Ek0xI4QQQkirxxjDgQMHsGXLFhw8eBAXL15EQUEBqqqqEBQUhI4dO2Lw4MEYN24c7rnnHmg0mpauMvGAxWLBrl27sHXrVhw6dAiXLl3ClStXUFtbi+DgYHTu3Bk33ngjJk2ahLvuugs8T7+NEkIIIa5QgIgQQgghhBBCCCGkjaOfUQghhBBCCCGEEELaOAoQEUIIIYQQQgghhLRxFCAihBBCCCGEEEIIaeMoQEQIIYQQQgghhBDSxlGAiBBCCCGEEEIIIaSNowARIYQQQgghhBBCSBtHASJCCCGEEEIIIYSQNo4CRIQQQgghhBBCCCFtHAWICCGEEEIIIYQQQtq4/wdIgVf1Fuzd+wAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(12,8))\n", + "\n", + "plt.rcParams[\"font.weight\"] = \"bold\"\n", + "plt.rcParams[\"axes.labelweight\"] = \"bold\"\n", + "plt.rcParams[\"font.size\"] = 20\n", + "\n", + "sns.histplot(data=dic,x='rec_len',kde=True,binwidth=1, stat='density', common_norm=False)\n", + "\n", + "plt.xlim(0,800)\n", + "\n", + "ax = plt.gca()\n", + "\n", + "# Set font size and weight for labels and ticks\n", + "ax.set_xlabel(\"Target Length\", fontsize=20, weight='bold')\n", + "ax.set_ylabel(\"Fraction of Targets\", fontsize=20, weight='bold')\n", + "\n", + "plt.tight_layout() # Add this line\n", + "\n", + "# plt.savefig('/datapool/data2/home/jiahan/ResProj/PepDiff/frame-flow/Data/Models_new/Results/plot/data2.png',dpi=1200,bbox_inches='tight')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1a1r\n", + "4 1a38\n", + "15 1abi\n", + "17 1acy\n", + "25 1ai1\n", + " ... \n", + "13293 6v8o\n", + "13297 6vdb\n", + "13298 6vfs\n", + "13299 6vfx\n", + "13300 6xvd\n", + "Name: pdb, Length: 9024, dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dic['pdb']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8843" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pepbdb_ls = list(set(dic['pdb']))\n", + "len(pepbdb_ls)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pdbpeppep_lenresnopep_atomrec_atomtyperec_len
01a1rC162.5001081117prot139.625
41a38P53.350471730prot216.250
151abiI202.3011532039prot254.875
171acyP103.000791700prot212.500
251ai1P102.801801700prot212.500
\n", + "
" + ], + "text/plain": [ + " pdb pep pep_len res no pep_atom rec_atom type rec_len\n", + "0 1a1r C 16 2.50 0 108 1117 prot 139.625\n", + "4 1a38 P 5 3.35 0 47 1730 prot 216.250\n", + "15 1abi I 20 2.30 1 153 2039 prot 254.875\n", + "17 1acy P 10 3.00 0 79 1700 prot 212.500\n", + "25 1ai1 P 10 2.80 1 80 1700 prot 212.500" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dic.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# 2.biolip filtering" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "lig_path = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig_fix\"\n", + "rec_path = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_rec/\"\n", + "data_path = \"/datapool/data2/home/jiahan/Data/PepMerge/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24629" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tab = pd.read_csv(\"/datapool/data2/home/jiahan/Data/BioLiP/Nonredund_stru_annotations.csv\")\n", + "tab = tab[tab['Resolution (Å)']<=4]\n", + "tab_ls = list(set(tab['PDB ID']))\n", + "len(tab_ls)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1402" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lip_dic = {}\n", + "with open(\"/datapool/data2/home/jiahan/Data/BioLiP/PIII_nonredund.txt\",'r') as f:\n", + " for line in f:\n", + " line = line.strip().split(',')\n", + " pdb = line[1].split('_')[0]\n", + " if pdb not in pepbdb_ls and pdb in tab_ls: # not in pepbdb and <= 4 A\n", + " seq_lig = get_fasta_from_pdb(os.path.join(lig_path,line[1]+'.pdb'))['A']\n", + " # if 4<=len(seq_lig)<=25:\n", + " if 3<=len(seq_lig)<=25:\n", + " lip_dic[line[1]] = line[0]\n", + "len(lip_dic) # 1322" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# 3. Merge Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "data_path = \"/datapool/data2/home/jiahan/Data/PepMerge_new/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def save_pair_lip(lig,rec,data_path = \"/datapool/data2/home/jiahan/Data/PepMerge/\"):\n", + " lig_parser, rec_parser = PDBParser(), PDBParser()\n", + " lig_structure, rec_structure = lig_parser.get_structure('lig', os.path.join(lig_path,f'{lig}.pdb')), rec_parser.get_structure('rec', os.path.join(rec_path,f'{rec}.pdb'))\n", + " ligs = [atom for atom in lig_structure[0].get_atoms() if atom.get_name() == 'CB']\n", + " recs = [atom for atom in rec_structure[0].get_atoms() if atom.get_name() == 'CB']\n", + " search = NeighborSearch(recs)\n", + " near_chains = []\n", + " for atom in ligs:\n", + " near_chains += search.search(atom.get_coord(), 6.0, level='C') # nearest 6 A Cbeta\n", + " near_chains = list(set([chain.get_full_id() for chain in near_chains]))\n", + " \n", + " class ChainSelector(Select):\n", + " def accept_chain(self, chain):\n", + " return chain.get_full_id() in near_chains\n", + " \n", + " os.makedirs(os.path.join(data_path,lig),exist_ok=True)\n", + " io = PDBIO()\n", + " io.set_structure(rec_structure)\n", + " io.save(os.path.join(data_path,lig,'receptor.pdb'), ChainSelector())\n", + " lig_structure[0]['A'].id = lig.split('_')[-1][0] # rename chain\n", + " io = PDBIO()\n", + " io.set_structure(lig_structure)\n", + " io.save(os.path.join(data_path,lig,'peptide.pdb'))\n", + "\n", + " # # lig + rec in the same pdb\n", + " # # 创建一个新的模型\n", + " # new_model = Model.Model(0)\n", + "\n", + " # # 将两个结构中的所有链添加到新的模型中\n", + " # for chain in lig_structure.get_chains():\n", + " # new_model.add(chain)\n", + " # for chain in rec_structure.get_chains():\n", + " # new_model.add(chain)\n", + "\n", + " # # 创建一个新的结构并将新的模型添加到其中\n", + " # new_structure = Structure.Structure('new_structure')\n", + " # new_structure.add(new_model)\n", + "\n", + " # # 保存新的结构为一个PDB文件\n", + " # io = PDBIO()\n", + " # io.set_structure(new_structure)\n", + " # io.save(os.path.join(data_path,lig,'combined.pdb')) \n", + " \n", + " lig_seq = get_fasta_from_pdb(os.path.join(data_path,lig,'peptide.pdb'))\n", + " rec_seq = get_fasta_from_pdb(os.path.join(data_path,lig,'receptor.pdb'))\n", + " if 2 * len(''.join(list(lig_seq.values()))) > len(''.join(list(rec_seq.values()))):\n", + " shutil.rmtree(os.path.join(data_path,lig))\n", + " return None\n", + " else:\n", + " lig_chain = ''\n", + " rec_chain = ''\n", + " with open(os.path.join(data_path,lig,'peptide.fasta'),'w') as f:\n", + " for k,v in lig_seq.items():\n", + " # lig_chain += f'{k}'\n", + " f.write(f'>{lig}_{k}\\n{v}\\n')\n", + " with open(os.path.join(data_path,lig,'receptor.fasta'),'w') as f:\n", + " for k,v in rec_seq.items():\n", + " # rec_chain += f'{k}'\n", + " f.write(f'>{lig}_{k}\\n{v}\\n')\n", + "\n", + " return lig" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Preprocess: 9%|▉ | 127/1402 [00:02<00:21, 60.14it/s]/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 129\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 130\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 136\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 115\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 160\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 160\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 76\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 28\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 115\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 165\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 315\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 27\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1164\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1143\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 662\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 831\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1027\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1143\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1386\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2332\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1374\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3086\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3729\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "Preprocess: 18%|█▊ | 254/1402 [00:06<00:31, 36.83it/s]/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 85\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1972\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 10065\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 98\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 56\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 64\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 48\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 148\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 18\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2639\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1221\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 66\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 47\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 111\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 58\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3341\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 50\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1318\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 702\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 544\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2040\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2545\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1808\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 79\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1996\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2360\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 467\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 120\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 145\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 62\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 99\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 555\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1133\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 24\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 54\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 79\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 631\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 977\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 36\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1054\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 95\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 109\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1069\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 67\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 42\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 65\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 43\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3184\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 63\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 29\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 476\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 958\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 828\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1416\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 41\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6758\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 46\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 96\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1252\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1625\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 154\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 45\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4612\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 56\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2369\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3178\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 100\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 111\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3364\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 793\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 904\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 51\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 121\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5388\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 27\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2763\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4811\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2795\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 9502\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 759\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 79\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2019\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 147\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3204\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1315\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 130\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 20\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 181\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2472\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 586\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 837\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 120\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3620\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1502\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 250\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 133\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 301\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 220\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2939\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2432\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 71\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2432\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 87\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 158\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1320\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1533\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 158\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 57\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 876\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 190\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 54\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 85\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 921\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 221\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1952\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 440\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1449\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 256\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2345\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 65\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 20\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 35\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 92\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 129\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 29\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 120\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 60\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 87\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1565\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3695\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 24\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1715\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1933\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 774\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 92\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 538\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 132\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2535\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 993\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2229\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 145\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3338\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 824\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 116\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4863\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 165\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 117\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 52\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 110\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 97\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 683\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1214\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1053\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 57\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 105\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 389\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1005\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 59\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 69\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1373\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 16\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4968\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 59\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4230\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 257\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1423\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 967\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 139\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 133\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1640\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3971\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 138\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 44\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 105\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 141\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 32\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 59\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 110\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 515\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 615\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3085\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 45\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 50\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2571\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 237\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 824\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 939\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2363\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4411\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 144\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 132\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5058\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4411\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 43\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 752\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1525\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 108\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3722\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 40\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 21\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 40\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1114\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 11233\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 121\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 21\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1423\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6551\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 173\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 170\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3565\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2173\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3971\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3621\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 56\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1240\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 138\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 70\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4219\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 204\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 110\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4219\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 690\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 172\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1133\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 354\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4219\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3057\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4219\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 577\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2486\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2814\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 635\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2939\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 86\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 39\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 51\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2322\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3407\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 388\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 671\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 29\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 12854\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 23\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1547\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 34\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 68\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 13620\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 412\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 67\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 52\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 158\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 351\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 115\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2027\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 130\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 120\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 130\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 142\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2818\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1386\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2219\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 588\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1610\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 796\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1735\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 111\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 134\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 201\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 138\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 599\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2718\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 650\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3173\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1440\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 80\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 184\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 195\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3752\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 42\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 72\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 516\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 58\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 58\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 698\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 968\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1017\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 629\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4760\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1258\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1396\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1410\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 629\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 193\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 193\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 45\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2309\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1936\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2309\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1258\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 85\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2786\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1410\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1410\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 104\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 104\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 141\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1703\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2786\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2786\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1872\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2394\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 868\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 141\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3739\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 60\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 67\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2719\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 61\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 973\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2134\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 68\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 571\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2719\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1137\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1872\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1946\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 194\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 38\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3744\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 51\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 181\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 81\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7449\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3057\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 81\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1208\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3057\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 104\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 156\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 196\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 968\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2719\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 81\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5438\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3744\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 38\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5438\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 799\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1765\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 36\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1936\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3285\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1715\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3285\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1765\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 179\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1421\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 63\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2124\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 63\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1765\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6165\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 90\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3532\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 90\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2744\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3532\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 64\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 82\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3532\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 126\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 536\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 536\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1051\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 103\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2477\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2477\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1113\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1051\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6416\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 717\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 112\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 717\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2970\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1434\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1434\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2970\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 183\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4925\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 95\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 159\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4925\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2095\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 62\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5940\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5940\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 62\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 97\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 97\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 8222\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 112\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4135\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2449\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 112\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1541\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3251\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 64\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 10891\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 29846\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 115\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2376\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 182\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3082\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 45\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5438\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3273\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6165\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1582\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3936\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3936\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1042\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2780\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2838\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 50\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 388\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 94\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 78\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4752\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2838\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 70\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 88\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 44\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 68\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 540\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2035\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 44\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5791\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1387\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1090\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7872\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2035\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7872\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2280\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 33\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6416\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3990\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2277\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5791\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2277\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 190\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 152\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3990\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2829\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 100\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 163\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 80\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 131\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 131\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4572\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 156\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 590\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2025\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 135\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4572\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1715\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 135\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1541\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1915\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 127\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 135\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4026\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 28\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2212\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 650\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2376\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3082\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1218\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 831\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 195\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1662\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 119\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3138\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2904\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 95\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 195\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 288\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1405\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 288\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 576\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4752\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 55\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 576\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1106\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1528\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 56\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 126\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3080\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 205\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 728\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2325\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 747\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 38930\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1310\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5556\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1441\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1528\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2635\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 12905\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6359\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 12905\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 24673\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3080\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2440\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 25585\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 25702\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 154\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 26557\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 216\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2823\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1951\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4085\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 49\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 25706\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 53\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 153\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2547\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 105\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1667\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1503\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3509\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 206\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 181\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 168\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 25706\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 87\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 338\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 279\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 25\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 180\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 164\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 80\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 112\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1449\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2211\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7518\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1015\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3971\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4085\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3881\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2040\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3971\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 226\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 50\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7518\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1830\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 807\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 24\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3530\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 106\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 267\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 128\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1888\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6782\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 852\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 5656\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 36\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 48\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3205\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 119\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 147\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 52\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 107\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4085\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1781\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 127\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 49\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3549\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 138\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 38\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 28\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 131\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3917\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1276\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 924\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2939\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4392\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3226\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 142\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 32\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 169\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3312\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1018\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 194\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3083\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2190\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 6982\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 120\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4766\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 62\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 84\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 66\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 66\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 597\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 159\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1252\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1139\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 173\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 180\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 92\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1757\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1209\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1062\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1730\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3462\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 7223\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 92\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 140\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 64\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 73\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 80\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 147\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 55\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 50\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1491\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 148\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 148\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 206\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 689\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 87\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 831\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2376\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 664\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1763\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2189\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2893\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 1676\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 2655\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 88\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 41\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 4589\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 194\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3165\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record 'TER' at line 3820\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py:144: BiopythonDeprecationWarning: 'three_to_one' will be deprecated in a future release of Biopython in favor of 'Bio.PDB.Polypeptide.protein_letters_3to1'.\n", + " warnings.warn(\n", + "Preprocess: 18%|█▊ | 254/1402 [00:20<00:31, 36.83it/s]" + ] + }, + { + "ename": "PDBConstructionException", + "evalue": "A defined twice", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31m_RemoteTraceback\u001b[0m Traceback (most recent call last)", + "\u001b[0;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/externals/loky/process_executor.py\", line 463, in _process_worker\n r = call_item()\n File \"/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/externals/loky/process_executor.py\", line 291, in __call__\n return self.fn(*self.args, **self.kwargs)\n File \"/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py\", line 589, in __call__\n return [func(*args, **kwargs)\n File \"/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py\", line 589, in \n return [func(*args, **kwargs)\n File \"/tmp/ipykernel_794754/1772696423.py\", line 33, in save_pair_lip\n File \"/datapool/data2/home/jiahan/anaconda3/envs/fm/lib/python3.10/site-packages/Bio/PDB/Entity.py\", line 217, in add\n raise PDBConstructionException(f\"{entity_id} defined twice\")\nBio.PDB.PDBExceptions.PDBConstructionException: A defined twice\n\"\"\"", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mPDBConstructionException\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/datapool/data2/home/jiahan/Res Proj/PepDiff/frame-flow/playgrounds/gen_dataset.ipynb Cell 21\u001b[0m line \u001b[0;36m4\n\u001b[1;32m 1\u001b[0m lig_path \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig_fix\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m rec_path \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m/datapool/data2/home/jiahan/Data/BioLiP/nonredund_rec/\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m lip_list \u001b[39m=\u001b[39m joblib\u001b[39m.\u001b[39;49mParallel(\n\u001b[1;32m 5\u001b[0m n_jobs \u001b[39m=\u001b[39;49m \u001b[39mmax\u001b[39;49m(joblib\u001b[39m.\u001b[39;49mcpu_count() \u001b[39m/\u001b[39;49m\u001b[39m/\u001b[39;49m \u001b[39m2\u001b[39;49m, \u001b[39m1\u001b[39;49m),\n\u001b[1;32m 6\u001b[0m )(\n\u001b[1;32m 7\u001b[0m joblib\u001b[39m.\u001b[39;49mdelayed(save_pair_lip)(lig,rec,data_path)\n\u001b[1;32m 8\u001b[0m \u001b[39mfor\u001b[39;49;00m (lig,rec) \u001b[39min\u001b[39;49;00m tqdm(lip_dic\u001b[39m.\u001b[39;49mitems(), dynamic_ncols\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, desc\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mPreprocess\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 10\u001b[0m lip_list \u001b[39m=\u001b[39m [item \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m lip_list \u001b[39mif\u001b[39;00m item \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m]\n\u001b[1;32m 11\u001b[0m \u001b[39mlen\u001b[39m(lip_list) \u001b[39m# 1293\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:1952\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1946\u001b[0m \u001b[39m# The first item from the output is blank, but it makes the interpreter\u001b[39;00m\n\u001b[1;32m 1947\u001b[0m \u001b[39m# progress until it enters the Try/Except block of the generator and\u001b[39;00m\n\u001b[1;32m 1948\u001b[0m \u001b[39m# reach the first `yield` statement. This starts the aynchronous\u001b[39;00m\n\u001b[1;32m 1949\u001b[0m \u001b[39m# dispatch of the tasks to the workers.\u001b[39;00m\n\u001b[1;32m 1950\u001b[0m \u001b[39mnext\u001b[39m(output)\n\u001b[0;32m-> 1952\u001b[0m \u001b[39mreturn\u001b[39;00m output \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_generator \u001b[39melse\u001b[39;00m \u001b[39mlist\u001b[39;49m(output)\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:1595\u001b[0m, in \u001b[0;36mParallel._get_outputs\u001b[0;34m(self, iterator, pre_dispatch)\u001b[0m\n\u001b[1;32m 1592\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[1;32m 1594\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backend\u001b[39m.\u001b[39mretrieval_context():\n\u001b[0;32m-> 1595\u001b[0m \u001b[39myield from\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_retrieve()\n\u001b[1;32m 1597\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mGeneratorExit\u001b[39;00m:\n\u001b[1;32m 1598\u001b[0m \u001b[39m# The generator has been garbage collected before being fully\u001b[39;00m\n\u001b[1;32m 1599\u001b[0m \u001b[39m# consumed. This aborts the remaining tasks if possible and warn\u001b[39;00m\n\u001b[1;32m 1600\u001b[0m \u001b[39m# the user if necessary.\u001b[39;00m\n\u001b[1;32m 1601\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:1699\u001b[0m, in \u001b[0;36mParallel._retrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_wait_retrieval():\n\u001b[1;32m 1693\u001b[0m \n\u001b[1;32m 1694\u001b[0m \u001b[39m# If the callback thread of a worker has signaled that its task\u001b[39;00m\n\u001b[1;32m 1695\u001b[0m \u001b[39m# triggered an exception, or if the retrieval loop has raised an\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m# exception (e.g. `GeneratorExit`), exit the loop and surface the\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \u001b[39m# worker traceback.\u001b[39;00m\n\u001b[1;32m 1698\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_aborting:\n\u001b[0;32m-> 1699\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise_error_fast()\n\u001b[1;32m 1700\u001b[0m \u001b[39mbreak\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \u001b[39m# If the next job is not ready for retrieval yet, we just wait for\u001b[39;00m\n\u001b[1;32m 1703\u001b[0m \u001b[39m# async callbacks to progress.\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:1734\u001b[0m, in \u001b[0;36mParallel._raise_error_fast\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1730\u001b[0m \u001b[39m# If this error job exists, immediatly raise the error by\u001b[39;00m\n\u001b[1;32m 1731\u001b[0m \u001b[39m# calling get_result. This job might not exists if abort has been\u001b[39;00m\n\u001b[1;32m 1732\u001b[0m \u001b[39m# called directly or if the generator is gc'ed.\u001b[39;00m\n\u001b[1;32m 1733\u001b[0m \u001b[39mif\u001b[39;00m error_job \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1734\u001b[0m error_job\u001b[39m.\u001b[39;49mget_result(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtimeout)\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:736\u001b[0m, in \u001b[0;36mBatchCompletionCallBack.get_result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 730\u001b[0m backend \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mparallel\u001b[39m.\u001b[39m_backend\n\u001b[1;32m 732\u001b[0m \u001b[39mif\u001b[39;00m backend\u001b[39m.\u001b[39msupports_retrieve_callback:\n\u001b[1;32m 733\u001b[0m \u001b[39m# We assume that the result has already been retrieved by the\u001b[39;00m\n\u001b[1;32m 734\u001b[0m \u001b[39m# callback thread, and is stored internally. It's just waiting to\u001b[39;00m\n\u001b[1;32m 735\u001b[0m \u001b[39m# be returned.\u001b[39;00m\n\u001b[0;32m--> 736\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_return_or_raise()\n\u001b[1;32m 738\u001b[0m \u001b[39m# For other backends, the main thread needs to run the retrieval step.\u001b[39;00m\n\u001b[1;32m 739\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/anaconda3/envs/fm/lib/python3.10/site-packages/joblib/parallel.py:754\u001b[0m, in \u001b[0;36mBatchCompletionCallBack._return_or_raise\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 753\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstatus \u001b[39m==\u001b[39m TASK_ERROR:\n\u001b[0;32m--> 754\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_result\n\u001b[1;32m 755\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_result\n\u001b[1;32m 756\u001b[0m \u001b[39mfinally\u001b[39;00m:\n", + "\u001b[0;31mPDBConstructionException\u001b[0m: A defined twice" + ] + } + ], + "source": [ + "lig_path = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_lig_fix\"\n", + "rec_path = \"/datapool/data2/home/jiahan/Data/BioLiP/nonredund_rec/\"\n", + "\n", + "lip_list = joblib.Parallel(\n", + " n_jobs = max(joblib.cpu_count() // 2, 1),\n", + ")(\n", + " joblib.delayed(save_pair_lip)(lig,rec,data_path)\n", + " for (lig,rec) in tqdm(lip_dic.items(), dynamic_ncols=True, desc='Preprocess')\n", + ")\n", + "lip_list = [item for item in lip_list if item is not None]\n", + "len(lip_list) # 1293" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1373" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(lip_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "def save_pair_pepbdb(pdb,data_path = \"/datapool/data2/home/jiahan/Data/PepMerge/\"):\n", + " try:\n", + " lig_parser, rec_parser = PDBParser(), PDBParser()\n", + " lig_structure, rec_structure = lig_parser.get_structure('lig', os.path.join(pepbdb_path,pdb,f'peptide.pdb')), rec_parser.get_structure('rec', os.path.join(pepbdb_path,pdb, f'receptor.pdb'))\n", + " ligs = [atom for atom in lig_structure[0].get_atoms() if atom.get_name() == 'CB']\n", + " recs = [atom for atom in rec_structure[0].get_atoms() if atom.get_name() == 'CB']\n", + " if len(ligs) == 0 or len(recs) == 0:\n", + " return None\n", + " search = NeighborSearch(recs)\n", + " near_chains = []\n", + " for atom in ligs:\n", + " near_chains += search.search(atom.get_coord(), 6.0, level='C')\n", + " near_chains = list(set([chain.get_full_id() for chain in near_chains]))\n", + " if len(near_chains) == 0:\n", + " return None\n", + "\n", + "\n", + " class ChainSelector(Select):\n", + " def accept_chain(self, chain):\n", + " return chain.get_full_id() in near_chains\n", + " \n", + " os.makedirs(os.path.join(data_path,pdb),exist_ok=True)\n", + " io = PDBIO()\n", + " io.set_structure(rec_structure)\n", + " io.save(os.path.join(data_path,pdb,'receptor.pdb'), ChainSelector())\n", + " io = PDBIO()\n", + " io.set_structure(lig_structure)\n", + " io.save(os.path.join(data_path,pdb,'peptide.pdb'))\n", + "\n", + "\n", + " lig_seq = get_fasta_from_pdb(os.path.join(data_path,pdb,'peptide.pdb'))\n", + " rec_seq = get_fasta_from_pdb(os.path.join(data_path,pdb,'receptor.pdb'))\n", + " if 2 * len(''.join(list(lig_seq.values()))) > len(''.join(list(rec_seq.values()))):\n", + " shutil.rmtree(os.path.join(data_path,pdb))\n", + " return None\n", + " else:\n", + " with open(os.path.join(data_path,pdb,'peptide.fasta'),'w') as f:\n", + " for k,v in lig_seq.items():\n", + " f.write(f'>{pdb}_{k}\\n{v}\\n')\n", + " with open(os.path.join(data_path,pdb,'receptor.fasta'),'w') as f:\n", + " for k,v in rec_seq.items():\n", + " f.write(f'>{pdb}_{k}\\n{v}\\n')\n", + " return pdb\n", + " except:\n", + " return None" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pdbpeppep_lenresnopep_atomrec_atomtyperec_len
01a1rC162.5001081117prot139.625
41a38P53.350471730prot216.250
151abiI202.3011532039prot254.875
171acyP103.000791700prot212.500
251ai1P102.801801700prot212.500
\n", + "
" + ], + "text/plain": [ + " pdb pep pep_len res no pep_atom rec_atom type rec_len\n", + "0 1a1r C 16 2.50 0 108 1117 prot 139.625\n", + "4 1a38 P 5 3.35 0 47 1730 prot 216.250\n", + "15 1abi I 20 2.30 1 153 2039 prot 254.875\n", + "17 1acy P 10 3.00 0 79 1700 prot 212.500\n", + "25 1ai1 P 10 2.80 1 80 1700 prot 212.500" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dic.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1a1r_C'" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pepbdb_ls = []\n", + "for i,row in dic.iterrows():\n", + " pepbdb_ls.append(f\"{row['pdb']}_{row['pep']}\")\n", + "pepbdb_ls[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9024" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(pepbdb_ls)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Preprocess: 0%| | 0/9024 [00:00 0: + logger = get_logger('train', None, local_rank) + writer = BlackHole() + else: + run = wandb.init(project=args.name, config=config, name='%s[%s]' % (config_name, args.tag)) + if args.resume: + log_dir = os.path.dirname(os.path.dirname(args.resume)) + else: + log_dir = get_new_log_dir(args.logdir, prefix='%s[%s]' % (config_name, version_short), tag=args.tag) + with open(os.path.join(log_dir, 'commit.txt'), 'w') as f: + f.write(branch + '\n') + f.write(version + '\n') + ckpt_dir = os.path.join(log_dir, 'checkpoints') + if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) + logger = get_logger('train', log_dir) + # writer = torch.utils.tensorboard.SummaryWriter(log_dir) + # tensorboard_trace_handler = torch.profiler.tensorboard_trace_handler(log_dir) + if not os.path.exists(os.path.join(log_dir, os.path.basename(args.config))): + shutil.copyfile(args.config, os.path.join(log_dir, os.path.basename(args.config))) + logger.info(args) + logger.info(config) + + # Set up DDP + logger.info('Initializing DDP...') + distrib.init_process_group(backend="nccl") + + # Data + logger.info('Loading datasets...') + train_dataset = PepDataset(structure_dir = config.dataset.train.structure_dir, dataset_dir = config.dataset.train.dataset_dir, + name = config.dataset.train.name, transform=None, reset=config.dataset.train.reset) + train_sampler = DistributedSampler(train_dataset, shuffle=True) + train_loader = DataLoader(train_dataset, batch_size=config.train.batch_size, collate_fn=PaddingCollate(), sampler=train_sampler, num_workers=args.num_workers, pin_memory=True) + train_iterator = inf_iterator(train_loader) + logger.info('Train %d | Val %d' % (len(train_dataset), len(train_dataset))) + + + + # Model + logger.info('Building model...') + model = DDP(FlowModel(config.model).to(local_rank), device_ids=[local_rank]) + # wandb.watch(model,log='all',log_freq=1) + logger.info('Number of parameters: %d' % count_parameters(model)) + + # Optimizer & Scheduler + optimizer = get_optimizer(config.train.optimizer, model) + scheduler = get_scheduler(config.train.scheduler, optimizer) + optimizer.zero_grad() + it_first = 1 + + # Resume + if args.resume is not None: + logger.info('Resuming from checkpoint: %s' % args.resume) + ckpt = torch.load(args.resume, map_location=f'cuda:{local_rank}') + it_first = ckpt['iteration'] # + 1 + model.load_state_dict(ckpt['model']) + logger.info('Resuming optimizer states...') + optimizer.load_state_dict(ckpt['optimizer']) + logger.info('Resuming scheduler states...') + scheduler.load_state_dict(ckpt['scheduler']) + # debug + # torch.autograd.set_detect_anomaly(True) + + def train(it): + time_start = current_milli_time() + model.train() + + # Prepare data + batch = recursive_to(next(train_iterator), local_rank) + + # # inspect + # if local_rank == 0: + # torch.autograd.set_detect_anomaly(True) + + # Forward pass + loss_dict = model(batch) # get loss and metrics + loss = sum_weighted_losses(loss_dict, config.train.loss_weights) + time_forward_end = current_milli_time() + + # if torch.isnan(loss): + # print('NAN Loss!') + # if local_rank == 0: + # torch.save({'batch':batch,'loss':loss,'loss_dict':loss_dict,'model': model.state_dict(), + # 'optimizer': optimizer.state_dict(), + # 'scheduler': scheduler.state_dict(), + # 'iteration': it,},os.path.join(log_dir,'nan.pt')) + # loss = torch.tensor(0.,requires_grad=True).to(loss.device) + + loss.backward() + + # rescue for nan grad + for param in model.parameters(): + if param.grad is not None: + if torch.isnan(param.grad).any(): + param.grad[torch.isnan(param.grad)] = 0 + + orig_grad_norm = clip_grad_norm_(model.parameters(), config.train.max_grad_norm) + + # Backward + # if it % config.train.accum_grad ==0: + optimizer.step() + optimizer.zero_grad() + time_backward_end = current_milli_time() + + # Logging + if local_rank == 0: + scalar_dict = {} + # scalar_dict.update(metric_dict['scalar']) + scalar_dict.update({ + 'grad': orig_grad_norm, + 'lr': optimizer.param_groups[0]['lr'], + 'time_forward': (time_forward_end - time_start) / 1000, + 'time_backward': (time_backward_end - time_forward_end) / 1000, + }) + log_losses(loss, loss_dict, scalar_dict, it=it, tag='train', logger=logger) + + def validate(it): + scalar_accum = ScalarMetricAccumulator() + with torch.no_grad(): + model.eval() + + for i, batch in enumerate(tqdm(val_loader, desc='Validate', dynamic_ncols=True)): + # Prepare data + batch = recursive_to(batch, args.device) + + # Forward pass + # loss_dict, metric_dict = model.get_loss(batch) + loss_dict = model(batch) + loss = sum_weighted_losses(loss_dict, config.train.loss_weights) + scalar_accum.add(name='loss', value=loss, batchsize=len(batch['aa']), mode='mean') + for k, v in loss_dict['scalar'].items(): + scalar_accum.add(name=k, value=v, batchsize=len(batch['aa']), mode='mean') + + avg_loss = scalar_accum.get_average('loss') + summary = scalar_accum.log(it, 'val', logger=logger, writer=writer) + for k,v in summary.items(): + wandb.log({f'val/{k}': v}, step=it) + # Trigger scheduler + if config.train.scheduler.type == 'plateau': + scheduler.step(avg_loss) + else: + scheduler.step() + return avg_loss + + try: + for it in range(it_first, config.train.max_iters + 1): + train(it) + # if it % config.train.val_freq == 0: + # avg_val_loss = validate(it) + # if not args.debug: + if it % config.train.val_freq == 0 and local_rank == 0: + ckpt_path = os.path.join(ckpt_dir, '%d.pt' % it) + torch.save({ + 'config': config, + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'scheduler': scheduler.state_dict(), + 'iteration': it, + # 'avg_val_loss': avg_val_loss, + }, ckpt_path) + except KeyboardInterrupt: + logger.info('Terminating...') + distrib.destroy_process_group() + # except RuntimeError: + # logger.info('It seems that peers have been terminated.') + # distrib.destroy_process_group() \ No newline at end of file