# Download AlphaFold models for an organism from the Uniprot web site. Here, "Methanosarcina mazei Go1" is used as example.

# Open the link below in your web browser.
https://www.uniprot.org/uniprotkb?query=Methanosarcina+mazei+Go1
# Go to the Download tab, select LIST format and copy-paste the result to METMA.list
> Download > LIST format --> copy to METMA.list

# IN LINUX, create a shell script to download the models from the EBI site
cd your_project_directory
perl -pe 's/(\w+)/wget https:\/\/alphafold.ebi.ac.uk\/files\/AF\-$1\-F1-model_v4.pdb/' < METMA.list > download.csh
chmod +x download.csh
./download.csh

# Example yielded 3297 / 3318 models successfully downloaded

# copy-paste scripts imp.pl and rename_compnd.pl from the end of this file
cat > imp.pl # first script from STDIN
cat > rename_compnd.pl # second script from STDIN
# use a text editor to customize $DALI_BIN, $BASEDIR and $DALIDATDIR in the scripts.

# import to Dali
mkdir DAT
ls *pdb > pdb.list
perl imp.pl < pdb.list > imp.csh
chmod +x imp.csh
./imp.csh

# Example created 3294  DAT files

# check that no model has >200 SSEs
head -1 DAT/*.dat| grep '>>>>' | sort -nk 4 | tail
# If the number in column 4 is >200, then you must manually split the structure into multiple chains or multiple entries
# (DaliLite crashes ungracefully if nSSE is greater than 200.)
# Our example was clean.

# We modify the COMPND records so that DaliLite summary reports both DaliLite identifier and AlphaFold Database identifier.
# map internal Dali identifiers to Uniprot accessions
perl -pe 's/^.*AF\-//' imp.csh | perl -pe 's/\-F1-model_v4.pdb --pdbid//' | perl -pe 's/\-\-dat.*$//' > mapping

# add accession number to compnd description
perl rename_compnd.pl < mapping

# create search list for Dali
ls DAT | perl -pe 's/\.dat//' > METMA_db.list

# pairwise Dali comparison using the newly created search list and a structure from the PDB as query
~/DaliLite.v5/bin/dali.pl --cd1 1dj7B --db METMA_db.list --dat1 /data/DaliLite/DAT/ --dat2 ./DAT/ --np 40 --clean >log 2> err


imp.pl:
=======
use strict;

my $DALI_BIN="/home/luholm/DaliLite.v5/bin";
my $BASEDIR='/data/liisa/alphafold/FTR/';
my(@a)=split(//,'abcdefgh');
my(@b)=split(//,'0123456789');
my(@c)=split(//,'0123456789');
my(@d)=split(//,'0123456789');
my $a=0;
my $b=0;
my $c=0;
my $d=0;
while(<STDIN>) {
        chomp();
        my($x)=$_;
        my $pdbfile=$BASEDIR.$x;
        my $cd=$a[$a].$b[$b].$c[$c].$d[$d];
        $d++;
        if($d>$#d) {
                $d=0;
                $c++;
                if($c>$#c) {
                        $c=0;
                        $b++;
                        if($b>$#b) {
                                $b=0;
                                $a++;
                                if($a>$#a) {
                                        die "Overflow\n";
                                }
                        }
                }
        }
        my $cmd="$DALI_BIN/import.pl --pdbfile $pdbfile --pdbid $cd --dat ./DAT";
        print "$cmd\n";
}


rename_compnd.pl:
=================
# in: mapping (acc dali-id)
# out: MOLECULE: of compnd replaced by acc

use strict;
my $DALIDATDIR="./DAT/";

while(<STDIN>) {
        my($acc,$cd)=split(/\s+/);
        my $datfile="$DALIDATDIR/$cd\A.dat";
        my(@lines)=`cat $datfile`;
        open(OUT,">$datfile");
        foreach(@lines) {
                s/MOLECULE:/AF\-$acc\-F1-model_v4\:/;
                print OUT $_;
        }
        close(OUT);
}