# Download AlphaFold models for an organism from the Uniprot web site. Here, "Methanosarcina mazei Go1" is used as example. # Open the link below in your web browser. https://www.uniprot.org/uniprotkb?query=Methanosarcina+mazei+Go1 # Go to the Download tab, select LIST format and copy-paste the result to METMA.list > Download > LIST format --> copy to METMA.list # IN LINUX, create a shell script to download the models from the EBI site cd your_project_directory perl -pe 's/(\w+)/wget https:\/\/alphafold.ebi.ac.uk\/files\/AF\-$1\-F1-model_v4.pdb/' < METMA.list > download.csh chmod +x download.csh ./download.csh # Example yielded 3297 / 3318 models successfully downloaded # copy-paste scripts imp.pl and rename_compnd.pl from the end of this file cat > imp.pl # first script from STDIN cat > rename_compnd.pl # second script from STDIN # use a text editor to customize $DALI_BIN, $BASEDIR and $DALIDATDIR in the scripts. # import to Dali mkdir DAT ls *pdb > pdb.list perl imp.pl < pdb.list > imp.csh chmod +x imp.csh ./imp.csh # Example created 3294 DAT files # check that no model has >200 SSEs head -1 DAT/*.dat| grep '>>>>' | sort -nk 4 | tail # If the number in column 4 is >200, then you must manually split the structure into multiple chains or multiple entries # (DaliLite crashes ungracefully if nSSE is greater than 200.) # Our example was clean. # We modify the COMPND records so that DaliLite summary reports both DaliLite identifier and AlphaFold Database identifier. # map internal Dali identifiers to Uniprot accessions perl -pe 's/^.*AF\-//' imp.csh | perl -pe 's/\-F1-model_v4.pdb --pdbid//' | perl -pe 's/\-\-dat.*$//' > mapping # add accession number to compnd description perl rename_compnd.pl < mapping # create search list for Dali ls DAT | perl -pe 's/\.dat//' > METMA_db.list # pairwise Dali comparison using the newly created search list and a structure from the PDB as query ~/DaliLite.v5/bin/dali.pl --cd1 1dj7B --db METMA_db.list --dat1 /data/DaliLite/DAT/ --dat2 ./DAT/ --np 40 --clean >log 2> err imp.pl: ======= use strict; my $DALI_BIN="/home/luholm/DaliLite.v5/bin"; my $BASEDIR='/data/liisa/alphafold/FTR/'; my(@a)=split(//,'abcdefgh'); my(@b)=split(//,'0123456789'); my(@c)=split(//,'0123456789'); my(@d)=split(//,'0123456789'); my $a=0; my $b=0; my $c=0; my $d=0; while() { chomp(); my($x)=$_; my $pdbfile=$BASEDIR.$x; my $cd=$a[$a].$b[$b].$c[$c].$d[$d]; $d++; if($d>$#d) { $d=0; $c++; if($c>$#c) { $c=0; $b++; if($b>$#b) { $b=0; $a++; if($a>$#a) { die "Overflow\n"; } } } } my $cmd="$DALI_BIN/import.pl --pdbfile $pdbfile --pdbid $cd --dat ./DAT"; print "$cmd\n"; } rename_compnd.pl: ================= # in: mapping (acc dali-id) # out: MOLECULE: of compnd replaced by acc use strict; my $DALIDATDIR="./DAT/"; while() { my($acc,$cd)=split(/\s+/); my $datfile="$DALIDATDIR/$cd\A.dat"; my(@lines)=`cat $datfile`; open(OUT,">$datfile"); foreach(@lines) { s/MOLECULE:/AF\-$acc\-F1-model_v4\:/; print OUT $_; } close(OUT); }