-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathipsae.py
More file actions
1010 lines (854 loc) · 44.1 KB
/
ipsae.py
File metadata and controls
1010 lines (854 loc) · 44.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# ipsae.py
# script for calculating the ipSAE score for scoring pairwise protein-protein interactions in AlphaFold2 and AlphaFold3 models
# https://www.biorxiv.org/content/10.1101/2025.02.10.637595v2
# Also calculates:
# pDockQ: Bryant, Pozotti, and Eloffson. https://www.nature.com/articles/s41467-022-28865-w
# pDockQ2: Zhu, Shenoy, Kundrotas, Elofsson. https://academic.oup.com/bioinformatics/article/39/7/btad424/7219714
# LIS: Kim, Hu, Comjean, Rodiger, Mohr, Perrimon. https://www.biorxiv.org/content/10.1101/2024.02.19.580970v1
# Roland Dunbrack
# Fox Chase Cancer Center
# version 4
# January 3, 2026: Fixed Boltz2 issues (PDB and mmCIF format; chainIDs)
# MIT license: script can be modified and redistributed for non-commercial and commercial use, as long as this information is reproduced.
# includes support for Boltz structures and structures with nucleic acids
# It may be necessary to install numpy with the following command:
# pip install numpy
# Usage:
# python ipsae.py <path_to_af2_pae_file> <path_to_af2_pdb_file> <pae_cutoff> <dist_cutoff>
# python ipsae.py <path_to_af3_pae_file> <path_to_af3_cif_file> <pae_cutoff> <dist_cutoff>
# python ipsae.py <path_to_boltz_pae_npz_file> <path_to_boltz_cif_file> <pae_cutoff> <dist_cutoff>
# python ipsae.py <path_to_boltz_pae_npz_file> <path_to_boltz_pdb_file> <pae_cutoff> <dist_cutoff>
#
# All output files will be in same path/folder as cif or pdb file
import sys, os, math
import json
import numpy as np
np.set_printoptions(threshold=np.inf) # for printing out full numpy arrays for debugging
# Input and output files and parameters
# Ensure correct usage
if len(sys.argv) < 5:
print("Usage for AF2 (PDB format):")
print(" python ipsae.py <path_to_pae_json_file> <path_to_pdb_file> <pae_cutoff> <dist_cutoff>")
print(" python ipsae.py RAF1_KSR1_scores_rank_001_alphafold2_multimer_v3_model_4_seed_003.json RAF1_KSR1_unrelaxed_rank_001_alphafold2_multimer_v3_model_4_seed_003.pdb 10 15")
print("")
print("Usage for AF3 (mmCIF format):")
print(" python ipsae.py <path_to_pae_json_file> <path_to_mmcif_file> <pae_cutoff> <dist_cutoff>")
print(" python ipsae.py fold_aurka_tpx2_full_data_0.json fold_aurka_tpx2_model_0.cif 10 15")
print("")
print("Usage for Boltz (PDB or mmCIF format):")
print(" python ipsae.py <path_to_pae_npz_file> <path_to_mmcif_file> <pae_cutoff> <dist_cutoff>")
print(" python ipsae.py <path_to_pae_npz_file> <path_to_pdb_file> <pae_cutoff> <dist_cutoff>")
print(" python ipsae.py pae_AURKA_TPX2_model_0.npz AURKA_TPX2_model_0.cif 10 15")
print(" python ipsae.py pae_AURKA_TPX2_model_0.npz AURKA_TPX2_model_0.pdb 10 15")
sys.exit(1)
pae_file_path = sys.argv[1]
pdb_path = sys.argv[2]
pae_cutoff = float(sys.argv[3])
dist_cutoff = float(sys.argv[4])
pae_string = str(int(pae_cutoff))
if pae_cutoff<10: pae_string="0"+pae_string
dist_string = str(int(dist_cutoff))
if dist_cutoff<10: dist_string="0"+dist_string
if ".pdb" in pdb_path and pae_file_path.endswith(".json"):
pdb_stem=pdb_path.replace(".pdb","")
path_stem = f'{pdb_path.replace(".pdb","")}_{pae_string}_{dist_string}'
af2 = True
af3 = False
boltz = False
cif = False
elif ".cif" in pdb_path and pae_file_path.endswith(".json"):
pdb_stem=pdb_path.replace(".cif","")
path_stem = f'{pdb_path.replace(".cif","")}_{pae_string}_{dist_string}'
af2 = False
af3 = True
boltz = False
cif = True
elif ".cif" in pdb_path and pae_file_path.endswith(".npz"): # Boltz1/2 in cif format
pdb_stem=pdb_path.replace(".cif","")
path_stem = f'{pdb_path.replace(".cif","")}_{pae_string}_{dist_string}'
af2 = False
af3 = False
boltz = True
cif = True
elif ".pdb" in pdb_path and pae_file_path.endswith(".npz"): # Boltz1/2 in pdb format
pdb_stem=pdb_path.replace(".pdb","")
path_stem = f'{pdb_path.replace(".pdb","")}_{pae_string}_{dist_string}'
af2 = False
af3 = False
boltz = True
cif = False
else:
print("Wrong PDB or PAE file type ", pdb_path)
sys.exit()
file_path = path_stem + ".txt"
file2_path = path_stem + "_byres.txt"
pml_path = path_stem + ".pml"
OUT = open(file_path,'w')
PML = open(pml_path,'w')
OUT2 = open(file2_path,'w')
# Define the ptm and d0 functions
def ptm_func(x,d0):
return 1.0/(1+(x/d0)**2.0)
ptm_func_vec=np.vectorize(ptm_func) # vector version
# Define the d0 functions for numbers and arrays; minimum value = 1.0; from Yang and Skolnick, PROTEINS: Structure, Function, and Bioinformatics 57:702–710 (2004)
def calc_d0(L,pair_type):
L=float(L)
min_value=1.0
if pair_type=='nucleic_acid': min_value=2.0
if L>27:
d0=1.24*(L-15)**(1.0/3.0) - 1.8
else:
d0=1.0
return max(min_value, d0)
def calc_d0_array(L, pair_type):
# Convert L to a NumPy array if it isn't already one (enables flexibility in input types)
# fixed 01.03.2026: now returns 1.00 instead of 1.04 for minimum value
L = np.array(L, dtype=float)
L = np.maximum(26,L)
min_value=1.0
if pair_type=='nucleic_acid': min_value=2.0
# Calculate d0 using the vectorized operation
return np.maximum(min_value, 1.24 * (L - 15) ** (1.0/3.0) - 1.8)
# Define the parse_atom_line function for PDB lines (by column) and mmCIF lines (split by white_space)
# parsed_line = parse_atom_line(line)
# line = "ATOM 123 CA ALA A 15 11.111 22.222 33.333 1.00 20.00 C"
def parse_pdb_atom_line(line):
atom_num = line[6:11].strip()
atom_name = line[12:16].strip()
residue_name = line[17:20].strip()
if residue_name == "LIG": return None # ligands in Boltz PDB-format files
chain_id = line[21].strip()
residue_seq_num = line[22:26].strip()
x = line[30:38].strip()
y = line[38:46].strip()
z = line[46:54].strip()
# Convert string numbers to integers or floats as appropriate
atom_num = int(atom_num)
residue_seq_num = int(residue_seq_num)
x = float(x)
y = float(y)
z = float(z)
return {
'atom_num': atom_num,
'atom_name': atom_name,
'residue_name': residue_name,
'chain_id': chain_id,
'residue_seq_num': residue_seq_num,
'x': x,
'y': y,
'z': z
}
def parse_cif_atom_line(line,fielddict):
# for parsing AF3 and Boltz1/2 mmCIF files
# ligands do not have residue numbers but modified residues do. Return "None" for ligand.
# AF3 mmcif lines
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#ATOM 1294 N N . ARG A 1 159 ? 5.141 -14.096 10.526 1.00 95.62 159 A 1
#ATOM 1295 C CA . ARG A 1 159 ? 4.186 -13.376 11.366 1.00 96.27 159 A 1
#ATOM 1296 C C . ARG A 1 159 ? 2.976 -14.235 11.697 1.00 96.42 159 A 1
#ATOM 1297 O O . ARG A 1 159 ? 2.654 -15.174 10.969 1.00 95.46 159 A 1
# ...
#HETATM 1305 N N . TPO A 1 160 ? 2.328 -13.853 12.742 1.00 96.42 160 A 1
#HETATM 1306 C CA . TPO A 1 160 ? 1.081 -14.560 13.218 1.00 96.78 160 A 1
#HETATM 1307 C C . TPO A 1 160 ? -2.115 -11.668 12.263 1.00 96.19 160 A 1
#HETATM 1308 O O . TPO A 1 160 ? -1.790 -11.556 11.113 1.00 95.75 160 A 1
# ...
#HETATM 2608 P PG . ATP C 3 . ? -6.858 4.182 10.275 1.00 84.94 1 C 1
#HETATM 2609 O O1G . ATP C 3 . ? -6.178 5.238 11.074 1.00 75.56 1 C 1
#HETATM 2610 O O2G . ATP C 3 . ? -5.889 3.166 9.748 1.00 75.15 1 C 1
# ...
#HETATM 2639 MG MG . MG D 4 . ? -7.262 2.709 4.825 1.00 91.47 1 D 1
#HETATM 2640 MG MG . MG E 5 . ? -4.994 2.251 8.755 1.00 85.96 1 E 1
# Boltz1 mmcif files (in non-standard order))
#_atom_site.group_PDB
#_atom_site.id
#_atom_site.type_symbol
#_atom_site.label_atom_id
#_atom_site.label_alt_id
#_atom_site.label_comp_id
#_atom_site.label_seq_id
#_atom_site.auth_seq_id
#_atom_site.pdbx_PDB_ins_code
#_atom_site.label_asym_id
#_atom_site.Cartn_x
#_atom_site.Cartn_y
#_atom_site.Cartn_z
#_atom_site.occupancy
#_atom_site.label_entity_id
#_atom_site.auth_asym_id
#_atom_site.auth_comp_id
#_atom_site.B_iso_or_equiv
#_atom_site.pdbx_PDB_model_num
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#ATOM 2652 N N . ASN 43 43 ? B 10.83538 6.06359 18.45139 1 2 B ASN 1 1
#ATOM 2653 C CA . ASN 43 43 ? B 10.76295 5.07366 19.53232 1 2 B ASN 1 1
#ATOM 2654 C C . ASN 43 43 ? B 11.21770 5.64437 20.88774 1 2 B ASN 1 1
#ATOM 2655 O O . ASN 43 43 ? B 12.06730 6.51688 20.91168 1 2 B ASN 1 1
#ATOM 2656 C CB . ASN 43 43 ? B 11.60137 3.84778 19.19481 1 2 B ASN 1 1
#ATOM 2657 C CG . ASN 43 43 ? B 10.96208 3.03997 18.07013 1 2 B ASN 1 1
#ATOM 2658 O OD1 . ASN 43 43 ? B 9.79094 3.17033 17.81165 1 2 B ASN 1 1
#ATOM 2659 N ND2 . ASN 43 43 ? B 11.77101 2.23791 17.39764 1 2 B ASN 1 1
#HETATM 2660 P PG . ATP . 1 ? C -8.79525 6.04621 -4.99212 1 3 C ATP 1 1
#HETATM 2661 O O1G . ATP . 1 ? C -10.01901 6.83468 -5.24825 1 3 C ATP 1 1
#HETATM 2662 O O2G . ATP . 1 ? C -9.03047 4.56941 -4.85246 1 3 C ATP 1 1
#HETATM 2663 O O3G . ATP . 1 ? C -7.97335 6.60305 -3.86656 1 3 C ATP 1 1
#HETATM 2664 P PB . ATP . 1 ? C -6.63618 7.04315 -6.56073 1 3 C ATP 1 1
#HETATM 2665 O O1B . ATP . 1 ? C -7.04640 8.36577 -7.14326 1 3 C ATP 1 1
#HETATM 2666 O O2B . ATP . 1 ? C -5.79036 7.13926 -5.33995 1 3 C ATP 1 1
linelist = line.split()
atom_num = linelist[ fielddict['id'] ]
atom_name = linelist[ fielddict['label_atom_id'] ]
residue_name = linelist[ fielddict['label_comp_id'] ]
if "auth_asym_id" in fielddict:
chain_id = linelist[ fielddict['auth_asym_id'] ]
else:
chain_id = linelist[ fielddict['label_asym_id'] ]
residue_seq_num = linelist[ fielddict['label_seq_id'] ]
x = linelist[ fielddict['Cartn_x'] ]
y = linelist[ fielddict['Cartn_y'] ]
z = linelist[ fielddict['Cartn_z'] ]
if residue_seq_num == ".": return None # ligand atom
# Convert string numbers to integers or floats as appropriate
atom_num = int(atom_num)
residue_seq_num = int(residue_seq_num)
x = float(x)
y = float(y)
z = float(z)
return {
'atom_num': atom_num,
'atom_name': atom_name,
'residue_name': residue_name,
'chain_id': chain_id,
'residue_seq_num': residue_seq_num,
'x': x,
'y': y,
'z': z
}
# Function for printing out residue numbers in PyMOL scripts
def contiguous_ranges(numbers):
if not numbers: # Check if the set is empty
return
sorted_numbers = sorted(numbers) # Sort the numbers
start = sorted_numbers[0]
end = start
ranges = [] # List to store ranges
def format_range(start, end):
if start == end:
return f"{start}"
else:
return f"{start}-{end}"
for number in sorted_numbers[1:]:
if number == end + 1:
end = number
else:
ranges.append(format_range(start, end))
start = end = number
# Append the last range after the loop
ranges.append(format_range(start, end))
# Join all ranges with a plus sign and print the result
string='+'.join(ranges)
return(string)
# Initializes a nested dictionary with all values set to 0
def init_chainpairdict_zeros(chainlist):
return {chain1: {chain2: 0 for chain2 in chainlist if chain1 != chain2} for chain1 in chainlist}
# Initializes a nested dictionary with NumPy arrays of zeros of a specified size
def init_chainpairdict_npzeros(chainlist, arraysize):
return {chain1: {chain2: np.zeros(arraysize) for chain2 in chainlist if chain1 != chain2} for chain1 in chainlist}
# Initializes a nested dictionary with empty sets.
def init_chainpairdict_set(chainlist):
return {chain1: {chain2: set() for chain2 in chainlist if chain1 != chain2} for chain1 in chainlist}
def classify_chains(chains, residue_types):
nuc_residue_set = {"DA", "DC", "DT", "DG", "A", "C", "U", "G"}
chain_types = {}
# Get unique chains and iterate over them
_, first_idx = np.unique(chains, return_index=True)
unique_chains = chains[np.sort(first_idx)]
for chain in unique_chains:
# Find indices where the current chain is located
indices = np.where(chains == chain)[0]
# Get the residues for these indices
chain_residues = residue_types[indices]
# Count nucleic acid residues
nuc_count = sum(residue in nuc_residue_set for residue in chain_residues)
# Determine if the chain is a nucleic acid or protein
chain_types[chain] = 'nucleic_acid' if nuc_count > 0 else 'protein'
return chain_types
# Load residues from AlphaFold PDB or mmCIF file into lists; each residue is a dictionary
# Read PDB file to get CA coordinates, chainids, and residue numbers
# Convert to np arrays, and calculate distances
residues = []
cb_residues = []
chains = []
atomsitefield_num=0
atomsitefield_dict={} # contains order of atom_site fields in mmCIF files; handles any mmCIF field order
# For af3 and boltz: need mask to identify CA atom tokens in plddt vector and pae matrix;
# Skip ligand atom tokens and non-CA-atom tokens in PTMs (those not in residue_set)
token_mask=list()
residue_set= {"ALA", "ARG", "ASN", "ASP", "CYS",
"GLN", "GLU", "GLY", "HIS", "ILE",
"LEU", "LYS", "MET", "PHE", "PRO",
"SER", "THR", "TRP", "TYR", "VAL",
"DA", "DC", "DT", "DG", "A", "C", "U", "G"}
nuc_residue_set = {"DA", "DC", "DT", "DG", "A", "C", "U", "G"}
with open(pdb_path, 'r') as PDB:
for line in PDB:
if line.startswith("_atom_site."):
line=line.strip()
(atomsite,fieldname)=line.split(".")
atomsitefield_dict[fieldname]=atomsitefield_num
atomsitefield_num += 1
continue
if line.startswith("ATOM") or line.startswith("HETATM"):
if cif:
atom=parse_cif_atom_line(line, atomsitefield_dict)
else:
atom=parse_pdb_atom_line(line)
if atom is None: # ligand atom
token_mask.append(0)
continue
if atom['atom_name'] == "CA" or "C1" in atom['atom_name']:
token_mask.append(1)
residues.append({
'atom_num': atom['atom_num'],
'coor': np.array([atom['x'], atom['y'], atom['z']]),
'res': atom['residue_name'],
'chainid': atom['chain_id'],
'resnum': atom['residue_seq_num'],
'residue': f"{atom['residue_name']:3} {atom['chain_id']:3} {atom['residue_seq_num']:4}"
})
chains.append(atom['chain_id'])
if atom['atom_name'] == "CB" or "C3" in atom['atom_name'] or (atom['residue_name']=="GLY" and atom['atom_name']=="CA"):
cb_residues.append({
'atom_num': atom['atom_num'],
'coor': np.array([atom['x'], atom['y'], atom['z']]),
'res': atom['residue_name'],
'chainid': atom['chain_id'],
'resnum': atom['residue_seq_num'],
'residue': f"{atom['residue_name']:3} {atom['chain_id']:3} {atom['residue_seq_num']:4}"
})
# add nucleic acids and non-CA atoms in PTM residues to tokens (as 0), whether labeled as "HETATM" (af3) or as "ATOM" (boltz)
if atom['atom_name'] != "CA" and "C1" not in atom['atom_name'] and atom['residue_name'] not in residue_set:
token_mask.append(0)
# Convert structure information to numpy arrays
numres = len(residues)
CA_atom_num= np.array([res['atom_num']-1 for res in residues]) # for AF3 atom indexing from 0
CB_atom_num= np.array([res['atom_num']-1 for res in cb_residues]) # for AF3 atom indexing from 0
coordinates = np.array([res['coor'] for res in cb_residues])
chains = np.array(chains)
_, first_idx = np.unique(chains, return_index=True)
unique_chains = chains[np.sort(first_idx)]
token_array=np.array(token_mask)
ntokens=np.sum(token_array)
residue_types=np.array([res['res'] for res in residues])
# chain types (nucleic acid (NA) or protein) and chain_pair_types ('nucleic_acid' if either chain is NA) for d0 calculation
# arbitrarily setting d0 to 2.0 for NA/protein or NA/NA chain pairs (approximately 21 base pairs)
d0_nucleic_acid=2.0
chain_dict = classify_chains(chains, residue_types)
chain_pair_type = init_chainpairdict_zeros(unique_chains)
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1==chain2: continue
if chain_dict[chain1] == 'nucleic_acid' or chain_dict[chain2] == 'nucleic_acid':
chain_pair_type[chain1][chain2]='nucleic_acid'
else:
chain_pair_type[chain1][chain2]='protein'
# Calculate distance matrix using NumPy broadcasting
distances = np.sqrt(((coordinates[:, np.newaxis, :] - coordinates[np.newaxis, :, :])**2).sum(axis=2))
# Load AF2, AF3, or BOLTZ data and extract plddt and pae_matrix (and ptm_matrix if available)
if af2:
if os.path.exists(pae_file_path):
if pae_file_path.endswith('.pkl'):
data = np.load(pae_file_path, allow_pickle=True)
else:
with open(pae_file_path, 'r') as file:
data = json.load(file)
if 'iptm' in data: iptm_af2 = float(data['iptm'])
else: iptm_af2=-1.0
if 'ptm' in data: ptm_af2 = float(data['ptm'])
else: ptm_af2=-1.0
if 'plddt' in data:
plddt = np.array(data['plddt'])
cb_plddt = np.array(data['plddt']) # for pDockQ
else:
plddt = np.zeros(numres)
cb_plddt = np.zeros(numres)
if 'pae' in data:
pae_matrix = np.array(data['pae'])
elif 'predicted_aligned_error' in data:
pae_matrix=np.array(data['predicted_aligned_error'])
else:
print("AF2 PAE file does not exist: ", pae_file_path)
sys.exit()
if boltz:
# Boltz filenames:
# AURKA_TPX2_model_0.cif
# confidence_AURKA_TPX2_model_0.json
# pae_AURKA_TPX2_model_0.npz
# plddt_AURKA_TPX2_model_0.npz
plddt_file_path=pae_file_path.replace("pae","plddt")
if os.path.exists(plddt_file_path):
data_plddt = np.load(plddt_file_path)
raw_plddt = data_plddt['plddt']
# Only multiply by 100 if the max value is <= 1.0 (meaning it's normalized)
if np.max(raw_plddt) <= 1.0:
plddt_boltz = np.array(100.0 * raw_plddt)
else:
plddt_boltz = np.array(raw_plddt)
plddt = plddt_boltz[np.ix_(token_array.astype(bool))]
cb_plddt = plddt_boltz[np.ix_(token_array.astype(bool))]
else:
plddt = np.zeros(ntokens)
cb_plddt = np.zeros(ntokens)
if os.path.exists(pae_file_path):
data_pae = np.load(pae_file_path)
pae_matrix_boltz=np.array(data_pae['pae'])
pae_matrix = pae_matrix_boltz[np.ix_(token_array.astype(bool), token_array.astype(bool))]
else:
print("Boltz PAE file does not exist: ", pae_file_path)
sys.exit()
summary_file_path=pae_file_path.replace("pae","confidence")
summary_file_path=summary_file_path.replace(".npz",".json")
iptm_boltz= {chain1: {chain2: 0 for chain2 in unique_chains if chain1 != chain2} for chain1 in unique_chains}
if os.path.exists(summary_file_path):
with open(summary_file_path, 'r') as file:
data_summary = json.load(file)
if 'pair_chains_iptm' in data_summary:
boltz_chain_pair_iptm_data=data_summary['pair_chains_iptm']
else:
# Boltz missing key fallback
print(f"Warning: 'pair_chains_iptm' key not found in {summary_file_path}. ipTM scores will be 0.")
boltz_chain_pair_iptm_data = {}
boltz_chain_pair_iptm_data=data_summary['pair_chains_iptm']
for nchain1, chain1 in enumerate(unique_chains):
for nchain2, chain2 in enumerate(unique_chains):
if chain1 == chain2: continue
iptm_boltz[chain1][chain2]=boltz_chain_pair_iptm_data[str(nchain1)][str(nchain2)]
else:
print("Boltz summary file does not exist: ", summary_file_path)
if af3:
# Example Alphafold3 server filenames
# fold_aurka_0_tpx2_0_full_data_0.json
# fold_aurka_0_tpx2_0_summary_confidences_0.json
# fold_aurka_0_tpx2_0_model_0.cif
# Example AlphaFold3 downloadable code filenames
# confidences.json
# summary_confidences.json
# model1.cif
if os.path.exists(pae_file_path):
with open(pae_file_path, 'r') as file:
data = json.load(file)
else:
print("AF3 PAE file does not exist: ", pae_file_path)
sys.exit()
if "atom_plddts" in data:
atom_plddts=np.array(data['atom_plddts'])
plddt=atom_plddts[CA_atom_num] # pull out residue plddts from Calpha atoms
cb_plddt=atom_plddts[CB_atom_num] # pull out residue plddts from Cbeta atoms for pDockQ
else:
plddt = np.zeros(numres)
cb_plddt = np.zeros(numres)
# Get pairwise residue PAE matrix by identifying one token per protein residue.
# Modified residues have separate tokens for each atom, so need to pull out Calpha atom as token
# Skip ligands
if 'pae' in data:
pae_matrix_af3 = np.array(data['pae'])
else:
print("no PAE data in AF3 json file; quitting")
sys.exit()
# Set pae_matrix for AF3 from subset of full PAE matrix from json file
token_array=np.array(token_mask)
pae_matrix = pae_matrix_af3[np.ix_(token_array.astype(bool), token_array.astype(bool))]
# Get iptm matrix from AF3 summary_confidences file
iptm_af3= {chain1: {chain2: 0 for chain2 in unique_chains if chain1 != chain2} for chain1 in unique_chains}
summary_file_path = None
if "confidences" in pae_file_path:
summary_file_path = pae_file_path.replace("confidences", "summary_confidences")
elif "full_data" in pae_file_path:
summary_file_path = pae_file_path.replace("full_data", "summary_confidences")
if summary_file_path is not None and os.path.exists(summary_file_path):
with open(summary_file_path,'r') as file:
data_summary=json.load(file)
af3_chain_pair_iptm_data=data_summary['chain_pair_iptm']
for nchain1, chain1 in enumerate(unique_chains):
for nchain2, chain2 in enumerate(unique_chains):
if chain1 == chain2: continue
iptm_af3[chain1][chain2]=af3_chain_pair_iptm_data[nchain1][nchain2]
else:
print("AF3 summary file does not exist: ", summary_file_path)
# Compute chain-pair-specific interchain PTM and PAE, count valid pairs, and count unique residues
# First, create dictionaries of appropriate size: top keys are chain1 and chain2 where chain1 != chain2
# Nomenclature:
# iptm_d0chn = calculate iptm from PAEs with no PAE cutoff; d0 = numres in chain pair = len(chain1) + len(chain2)
# ipsae_d0chn = calculate ipsae from PAEs with PAE cutoff; d0 = numres in chain pair = len(chain1) + len(chain2)
# ipsae_d0dom = calculate ipsae from PAEs with PAE cutoff; d0 from number of residues in chain1 and chain2 that have interchain PAE<cutoff
# ipsae_d0res = calculate ipsae from PAEs with PAE cutoff; d0 from number of residues in chain2 that have interchain PAE<cutoff given residue in chain1
#
# for each chain_pair iptm/ipsae, there is (for example)
# ipsae_d0res_byres = by-residue array;
# ipsae_d0res_asym = asymmetric pair value (A->B is different from B->A)
# ipsae_d0res_max = maximum of A->B and B->A value
# ipsae_d0res_asymres = identify of residue that provides each asym maximum
# ipsae_d0res_maxres = identify of residue that provides each maximum over both chains
#
# n0num = number of residues in whole complex provided by AF2 model
# n0chn = number of residues in chain pair = len(chain1) + len(chain2)
# n0dom = number of residues in chain pair that have good PAE values (<cutoff)
# n0res = number of residues in chain2 that have good PAE residues for each residue of chain1
iptm_d0chn_byres = init_chainpairdict_npzeros(unique_chains, numres)
ipsae_d0chn_byres = init_chainpairdict_npzeros(unique_chains, numres)
ipsae_d0dom_byres = init_chainpairdict_npzeros(unique_chains, numres)
ipsae_d0res_byres = init_chainpairdict_npzeros(unique_chains, numres)
iptm_d0chn_asym = init_chainpairdict_zeros(unique_chains)
ipsae_d0chn_asym = init_chainpairdict_zeros(unique_chains)
ipsae_d0dom_asym = init_chainpairdict_zeros(unique_chains)
ipsae_d0res_asym = init_chainpairdict_zeros(unique_chains)
iptm_d0chn_max = init_chainpairdict_zeros(unique_chains)
ipsae_d0chn_max = init_chainpairdict_zeros(unique_chains)
ipsae_d0dom_max = init_chainpairdict_zeros(unique_chains)
ipsae_d0res_max = init_chainpairdict_zeros(unique_chains)
iptm_d0chn_asymres = init_chainpairdict_zeros(unique_chains)
ipsae_d0chn_asymres = init_chainpairdict_zeros(unique_chains)
ipsae_d0dom_asymres = init_chainpairdict_zeros(unique_chains)
ipsae_d0res_asymres = init_chainpairdict_zeros(unique_chains)
iptm_d0chn_maxres = init_chainpairdict_zeros(unique_chains)
ipsae_d0chn_maxres = init_chainpairdict_zeros(unique_chains)
ipsae_d0dom_maxres = init_chainpairdict_zeros(unique_chains)
ipsae_d0res_maxres = init_chainpairdict_zeros(unique_chains)
n0chn = init_chainpairdict_zeros(unique_chains)
n0dom = init_chainpairdict_zeros(unique_chains)
n0dom_max = init_chainpairdict_zeros(unique_chains)
n0res = init_chainpairdict_zeros(unique_chains)
n0res_max = init_chainpairdict_zeros(unique_chains)
n0res_byres = init_chainpairdict_npzeros(unique_chains, numres)
d0chn = init_chainpairdict_zeros(unique_chains)
d0dom = init_chainpairdict_zeros(unique_chains)
d0dom_max = init_chainpairdict_zeros(unique_chains)
d0res = init_chainpairdict_zeros(unique_chains)
d0res_max = init_chainpairdict_zeros(unique_chains)
d0res_byres = init_chainpairdict_npzeros(unique_chains, numres)
valid_pair_counts = init_chainpairdict_zeros(unique_chains)
dist_valid_pair_counts = init_chainpairdict_zeros(unique_chains)
unique_residues_chain1 = init_chainpairdict_set(unique_chains)
unique_residues_chain2 = init_chainpairdict_set(unique_chains)
dist_unique_residues_chain1 = init_chainpairdict_set(unique_chains)
dist_unique_residues_chain2 = init_chainpairdict_set(unique_chains)
pDockQ_unique_residues = init_chainpairdict_set(unique_chains)
pDockQ = init_chainpairdict_zeros(unique_chains)
pDockQ2 = init_chainpairdict_zeros(unique_chains)
LIS = init_chainpairdict_zeros(unique_chains)
# pDockQ
pDockQ_cutoff=8.0
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 == chain2: continue
npairs=0
for i in range(numres):
if chains[i] != chain1: continue
valid_pairs = (chains==chain2) & (distances[i] <= pDockQ_cutoff)
npairs += np.sum(valid_pairs)
if valid_pairs.any():
pDockQ_unique_residues[chain1][chain2].add(i)
chain2residues=np.where(valid_pairs)[0]
for residue in chain2residues:
pDockQ_unique_residues[chain1][chain2].add(residue)
if npairs>0:
nres=len(list(pDockQ_unique_residues[chain1][chain2]))
mean_plddt= cb_plddt[ list(pDockQ_unique_residues[chain1][chain2])].mean()
x=mean_plddt*math.log10(npairs)
pDockQ[chain1][chain2]= 0.724 / (1 + math.exp(-0.052*(x-152.611)))+0.018
else:
mean_plddt=0.0
x=0.0
pDockQ[chain1][chain2]=0.0
nres=0
# pDockQ2
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 == chain2:
continue
npairs=0
sum=0.0
for i in range(numres):
if chains[i] != chain1:
continue
valid_pairs = (chains==chain2) & (distances[i] <= pDockQ_cutoff)
if valid_pairs.any():
npairs += np.sum(valid_pairs)
pae_list=pae_matrix[i][valid_pairs]
pae_list_ptm=ptm_func_vec(pae_list,10.0)
sum += pae_list_ptm.sum()
if npairs>0:
nres=len(list(pDockQ_unique_residues[chain1][chain2]))
mean_plddt= cb_plddt[ list(pDockQ_unique_residues[chain1][chain2])].mean()
mean_ptm = sum/npairs
x=mean_plddt*mean_ptm
pDockQ2[chain1][chain2]= 1.31 / (1 + math.exp(-0.075*(x-84.733)))+0.005
else:
mean_plddt=0.0
x=0.0
nres=0
pDockQ2[chain1][chain2]=0.0
# LIS
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1==chain2: continue
mask = (chains[:, None] == chain1) & (chains[None, :] == chain2) # Select residues for (chain1, chain2)
selected_pae = pae_matrix[mask] # Get PAE values for this pair
if selected_pae.size > 0: # Ensure we have values
valid_pae = selected_pae[selected_pae < 12] # Apply the threshold
if valid_pae.size > 0:
scores = (12 - valid_pae) / 12 # Compute scores
avg_score = np.mean(scores) # Average score for (chain1, chain2)
LIS[chain1][chain2] = avg_score
else:
LIS[chain1][chain2] = 0.0 # No valid values
else:
LIS[chain1][chain2]=0.0
# calculate ipTM/ipSAE with and without PAE cutoff
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 == chain2:
continue
n0chn[chain1][chain2]=np.sum( chains==chain1) + np.sum(chains==chain2) # total number of residues in chain1 and chain2
d0chn[chain1][chain2]=calc_d0(n0chn[chain1][chain2], chain_pair_type[chain1][chain2])
ptm_matrix_d0chn=np.zeros((numres,numres))
ptm_matrix_d0chn=ptm_func_vec(pae_matrix,d0chn[chain1][chain2])
valid_pairs_iptm = (chains == chain2)
valid_pairs_matrix = np.outer(chains == chain1, chains == chain2) & (pae_matrix < pae_cutoff)
for i in range(numres):
if chains[i] != chain1:
continue
valid_pairs_ipsae = valid_pairs_matrix[i] # row for residue i of chain1
iptm_d0chn_byres[chain1][chain2][i] = ptm_matrix_d0chn[i, valid_pairs_iptm].mean() if valid_pairs_iptm.any() else 0.0
ipsae_d0chn_byres[chain1][chain2][i] = ptm_matrix_d0chn[i, valid_pairs_ipsae].mean() if valid_pairs_ipsae.any() else 0.0
# Track unique residues contributing to the IPSAE for chain1,chain2
valid_pair_counts[chain1][chain2] += np.sum(valid_pairs_ipsae)
if valid_pairs_ipsae.any():
iresnum=residues[i]['resnum']
unique_residues_chain1[chain1][chain2].add(iresnum)
for j in np.where(valid_pairs_ipsae)[0]:
jresnum=residues[j]['resnum']
unique_residues_chain2[chain1][chain2].add(jresnum)
# Track unique residues contributing to iptm in interface
valid_pairs = (chains == chain2) & (pae_matrix[i] < pae_cutoff) & (distances[i] < dist_cutoff)
dist_valid_pair_counts[chain1][chain2] += np.sum(valid_pairs)
# Track unique residues contributing to the IPTM
if valid_pairs.any():
iresnum=residues[i]['resnum']
dist_unique_residues_chain1[chain1][chain2].add(iresnum)
for j in np.where(valid_pairs)[0]:
jresnum=residues[j]['resnum']
dist_unique_residues_chain2[chain1][chain2].add(jresnum)
OUT2.write("i AlignChn ScoredChain AlignResNum AlignResType AlignRespLDDT n0chn n0dom n0res d0chn d0dom d0res ipTM_pae ipSAE_d0chn ipSAE_d0dom ipSAE \n")
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 == chain2:
continue
residues_1 = len(unique_residues_chain1[chain1][chain2])
residues_2 = len(unique_residues_chain2[chain1][chain2])
n0dom[chain1][chain2] = residues_1+residues_2
d0dom[chain1][chain2] = calc_d0(n0dom[chain1][chain2], chain_pair_type[chain1][chain2])
ptm_matrix_d0dom = np.zeros((numres,numres))
ptm_matrix_d0dom = ptm_func_vec(pae_matrix,d0dom[chain1][chain2])
valid_pairs_matrix = np.outer(chains == chain1, chains == chain2) & (pae_matrix < pae_cutoff)
# Assuming valid_pairs_matrix is already defined
n0res_byres_all = np.sum(valid_pairs_matrix, axis=1)
d0res_byres_all = calc_d0_array(n0res_byres_all, chain_pair_type[chain1][chain2])
n0res_byres[chain1][chain2] = n0res_byres_all
d0res_byres[chain1][chain2] = d0res_byres_all
for i in range(numres):
if chains[i] != chain1:
continue
valid_pairs = valid_pairs_matrix[i]
ipsae_d0dom_byres[chain1][chain2][i] = ptm_matrix_d0dom[i, valid_pairs].mean() if valid_pairs.any() else 0.0
ptm_row_d0res=np.zeros((numres))
ptm_row_d0res=ptm_func_vec(pae_matrix[i], d0res_byres[chain1][chain2][i])
ipsae_d0res_byres[chain1][chain2][i] = ptm_row_d0res[valid_pairs].mean() if valid_pairs.any() else 0.0
outstring = f'{i+1:<4d} ' + (
f'{chain1:4} '
f'{chain2:4} '
f'{residues[i]["resnum"]:4d} '
f'{residues[i]["res"]:3} '
f'{plddt[i]:8.2f} '
f'{int(n0chn[chain1][chain2]):5d} '
f'{int(n0dom[chain1][chain2]):5d} '
f'{int(n0res_byres[chain1][chain2][i]):5d} '
f'{d0chn[chain1][chain2]:8.3f} '
f'{d0dom[chain1][chain2]:8.3f} '
f'{d0res_byres[chain1][chain2][i]:8.3f} '
f'{iptm_d0chn_byres[chain1][chain2][i]:8.4f} '
f'{ipsae_d0chn_byres[chain1][chain2][i]:8.4f} '
f'{ipsae_d0dom_byres[chain1][chain2][i]:8.4f} '
f'{ipsae_d0res_byres[chain1][chain2][i]:8.4f}\n'
)
OUT2.write(outstring)
# Compute interchain ipTM and ipSAE for each chain pair
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 == chain2:
continue
interchain_values = iptm_d0chn_byres[chain1][chain2]
max_index = np.argmax(interchain_values)
iptm_d0chn_asym[chain1][chain2] = interchain_values[max_index]
iptm_d0chn_asymres[chain1][chain2] = residues[max_index]['residue'] if max_index is not None else "None"
interchain_values = ipsae_d0chn_byres[chain1][chain2]
max_index = np.argmax(interchain_values)
ipsae_d0chn_asym[chain1][chain2] = interchain_values[max_index]
ipsae_d0chn_asymres[chain1][chain2] = residues[max_index]['residue'] if max_index is not None else "None"
interchain_values = ipsae_d0dom_byres[chain1][chain2]
max_index = np.argmax(interchain_values)
ipsae_d0dom_asym[chain1][chain2] = interchain_values[max_index]
ipsae_d0dom_asymres[chain1][chain2] = residues[max_index]['residue'] if max_index is not None else "None"
interchain_values = ipsae_d0res_byres[chain1][chain2]
max_index = np.argmax(interchain_values)
ipsae_d0res_asym[chain1][chain2] = interchain_values[max_index]
ipsae_d0res_asymres[chain1][chain2] = residues[max_index]['residue'] if max_index is not None else "None"
n0res[chain1][chain2]=n0res_byres[chain1][chain2][max_index]
d0res[chain1][chain2]=d0res_byres[chain1][chain2][max_index]
# pick maximum value for each chain pair for each iptm/ipsae type
if chain1 > chain2:
maxvalue=max(iptm_d0chn_asym[chain1][chain2], iptm_d0chn_asym[chain2][chain1])
if maxvalue==iptm_d0chn_asym[chain1][chain2]: maxres=iptm_d0chn_asymres[chain1][chain2]
else: maxres=iptm_d0chn_asymres[chain2][chain1]
iptm_d0chn_max[chain1][chain2]=maxvalue
iptm_d0chn_maxres[chain1][chain2]=maxres
iptm_d0chn_max[chain2][chain1]=maxvalue
iptm_d0chn_maxres[chain2][chain1]=maxres
maxvalue=max(ipsae_d0chn_asym[chain1][chain2], ipsae_d0chn_asym[chain2][chain1])
if maxvalue==ipsae_d0chn_asym[chain1][chain2]: maxres=ipsae_d0chn_asymres[chain1][chain2]
else: maxres=ipsae_d0chn_asymres[chain2][chain1]
ipsae_d0chn_max[chain1][chain2]=maxvalue
ipsae_d0chn_maxres[chain1][chain2]=maxres
ipsae_d0chn_max[chain2][chain1]=maxvalue
ipsae_d0chn_maxres[chain2][chain1]=maxres
maxvalue=max(ipsae_d0dom_asym[chain1][chain2], ipsae_d0dom_asym[chain2][chain1])
if maxvalue==ipsae_d0dom_asym[chain1][chain2]:
maxres=ipsae_d0dom_asymres[chain1][chain2]
maxn0=n0dom[chain1][chain2]
maxd0=d0dom[chain1][chain2]
else:
maxres=ipsae_d0dom_asymres[chain2][chain1]
maxn0=n0dom[chain2][chain1]
maxd0=d0dom[chain2][chain1]
ipsae_d0dom_max[chain1][chain2]=maxvalue
ipsae_d0dom_maxres[chain1][chain2]=maxres
ipsae_d0dom_max[chain2][chain1]=maxvalue
ipsae_d0dom_maxres[chain2][chain1]=maxres
n0dom_max[chain1][chain2]=maxn0
n0dom_max[chain2][chain1]=maxn0
d0dom_max[chain1][chain2]=maxd0
d0dom_max[chain2][chain1]=maxd0
maxvalue=max(ipsae_d0res_asym[chain1][chain2], ipsae_d0res_asym[chain2][chain1])
if maxvalue==ipsae_d0res_asym[chain1][chain2]:
maxres=ipsae_d0res_asymres[chain1][chain2]
maxn0=n0res[chain1][chain2]
maxd0=d0res[chain1][chain2]
else:
maxres=ipsae_d0res_asymres[chain2][chain1]
maxn0=n0res[chain2][chain1]
maxd0=d0res[chain2][chain1]
ipsae_d0res_max[chain1][chain2]=maxvalue
ipsae_d0res_maxres[chain1][chain2]=maxres
ipsae_d0res_max[chain2][chain1]=maxvalue
ipsae_d0res_maxres[chain2][chain1]=maxres
n0res_max[chain1][chain2]=maxn0
n0res_max[chain2][chain1]=maxn0
d0res_max[chain1][chain2]=maxd0
d0res_max[chain2][chain1]=maxd0
chaincolor={'A':'magenta', 'B':'marine', 'C':'lime', 'D':'orange',
'E':'yellow', 'F':'cyan', 'G':'lightorange', 'H':'pink',
'I':'deepteal', 'J':'forest', 'K':'lightblue', 'L':'slate',
'M':'violet', 'N':'arsenic', 'O':'iodine', 'P':'silver',
'Q':'red', 'R':'sulfur', 'S':'purple', 'T':'olive',
'U':'palegreen', 'V':'green', 'W':'blue', 'X':'palecyan',
'Y':'limon', 'Z':'chocolate'}
chainpairs=set()
for chain1 in unique_chains:
for chain2 in unique_chains:
if chain1 >= chain2: continue
chainpairs.add(chain1 + "-" + chain2)
OUT.write("\nChn1 Chn2 PAE Dist Type ipSAE ipSAE_d0chn ipSAE_d0dom ipTM_af ipTM_d0chn pDockQ pDockQ2 LIS n0res n0chn n0dom d0res d0chn d0dom nres1 nres2 dist1 dist2 Model\n")
PML.write("# Chn1 Chn2 PAE Dist Type ipSAE ipSAE_d0chn ipSAE_d0dom ipTM_af ipTM_d0chn pDockQ pDockQ2 LIS n0res n0chn n0dom d0res d0chn d0dom nres1 nres2 dist1 dist2 Model\n")
for pair in sorted(chainpairs):
(chain_a, chain_b) = pair.split("-")
pair1 = (chain_a, chain_b)
pair2 = (chain_b, chain_a)
for pair in (pair1, pair2):
chain1=pair[0]
chain2=pair[1]
if chain1 in chaincolor:
color1=chaincolor[chain1]
else:
color1='magenta'
if chain2 in chaincolor:
color2=chaincolor[chain2]
else:
color2='marine'
residues_1 = len(unique_residues_chain1[chain1][chain2])
residues_2 = len(unique_residues_chain2[chain1][chain2])
dist_residues_1 = len(dist_unique_residues_chain1[chain1][chain2])
dist_residues_2 = len(dist_unique_residues_chain2[chain1][chain2])
pairs = valid_pair_counts[chain1][chain2]
dist_pairs = dist_valid_pair_counts[chain1][chain2]
if af2: iptm_af = iptm_af2 # same for all chain pairs in entry
if af3: iptm_af = iptm_af3[chain1][chain2] # symmetric value for each chain pair
if boltz: iptm_af=iptm_boltz[chain1][chain2]
outstring=f'{chain1} {chain2} {pae_string:3} {dist_string:3} {"asym":5} ' + (
f'{ipsae_d0res_asym[chain1][chain2]:8.6f} '
f'{ipsae_d0chn_asym[chain1][chain2]:8.6f} '
f'{ipsae_d0dom_asym[chain1][chain2]:8.6f} '
f'{iptm_af:5.3f} '
f'{iptm_d0chn_asym[chain1][chain2]:8.6f} '
f'{pDockQ[chain1][chain2]:8.4f} '
f'{pDockQ2[chain1][chain2]:8.4f} '
f'{LIS[chain1][chain2]:8.4f} '
f'{int(n0res[chain1][chain2]):5d} '
f'{int(n0chn[chain1][chain2]):5d} '
f'{int(n0dom[chain1][chain2]):5d} '
f'{d0res[chain1][chain2]:6.2f} '
f'{d0chn[chain1][chain2]:6.2f} '
f'{d0dom[chain1][chain2]:6.2f} '
f'{residues_1:5d} '
f'{residues_2:5d} '
f'{dist_residues_1:5d} '
f'{dist_residues_2:5d} '
f'{pdb_stem}\n')
OUT.write(outstring)
PML.write("# " + outstring)
if chain1 > chain2:
residues_1 = max(len(unique_residues_chain2[chain1][chain2]), len(unique_residues_chain1[chain2][chain1]))
residues_2 = max(len(unique_residues_chain1[chain1][chain2]), len(unique_residues_chain2[chain2][chain1]))
dist_residues_1 = max(len(dist_unique_residues_chain2[chain1][chain2]), len(dist_unique_residues_chain1[chain2][chain1]))
dist_residues_2 = max(len(dist_unique_residues_chain1[chain1][chain2]), len(dist_unique_residues_chain2[chain2][chain1]))
iptm_af_value=iptm_af
pDockQ2_value=max(pDockQ2[chain1][chain2], pDockQ2[chain2][chain1])
if boltz:
iptm_af_value=max(iptm_boltz[chain1][chain2], iptm_boltz[chain2][chain1])
LIS_Score=(LIS[chain1][chain2]+LIS[chain2][chain1])/2.0
outstring=f'{chain2} {chain1} {pae_string:3} {dist_string:3} {"max":5} ' + (
f'{ipsae_d0res_max[chain1][chain2]:8.6f} '
f'{ipsae_d0chn_max[chain1][chain2]:8.6f} '
f'{ipsae_d0dom_max[chain1][chain2]:8.6f} '
f'{iptm_af_value:5.3f} '
f'{iptm_d0chn_max[chain1][chain2]:8.6f} '
f'{pDockQ[chain1][chain2]:8.4f} '
f'{pDockQ2_value:8.4f} '
f'{LIS_Score:8.4f} '
f'{int(n0res_max[chain1][chain2]):5d} '
f'{int(n0chn[chain1][chain2]):5d} '
f'{int(n0dom_max[chain1][chain2]):5d} '
f'{d0res_max[chain1][chain2]:6.2f} '
f'{d0chn[chain1][chain2]:6.2f} '
f'{d0dom_max[chain1][chain2]:6.2f} '
f'{residues_1:5d} '
f'{residues_2:5d} '
f'{dist_residues_1:5d} '