#! /usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Bio::SearchIO;
use Bio::SeqIO;
###########################################################################################################
# This program detects pili-operons from a list of protein or dna sequences in fasta format.
# Detection is performed in 11 steps:
# 1. If input alphabet is DNA we start by preprocessing DNA seqs and extracting ORFs from DNA contigs.
# 2. The extracted ORFs or protein sequences are preprocessed by deleting nonIUPAC chars from seq endpoints
# and replacing these chars by X within the sequences. Sequence annotations are saved and each seq  is labeled
# by its running number.
# 3. Running hmmsearch with a set of hmm models on the input sequences.
# Searches are performed one HMM at a time parsing the results after each search.
# 4. Selecting the HMM with the highest score for each input sequence. If all HMMs
# return negative scores, null model is selected. Null model is defined to have score 0
# and e-value equal to the total number of sequences.
# 5. Clustering sequences with non-null models into segments according to the "gap" threshold.
# Segments are defined by two numbers: running numbers of the leftmost and the rightmost sequences.
# 6. Calculating scores and p-values for segments. Segment scores are calculated by simply summing scores for
# sequences, p-values are calculated using bootstrap2 unitility (Fisher's exact test + MonteCarlo simulation).
# 7. Thresholding segments using cut-off values on segment score, segment P/P_adj-value and segment length.
# 8. Selecting top level segments.
# 9. Running Phobius on seqs that passed thresholding
# 10. Sorting results by the running number of segments first sequence, segment score or segment P/P_adj-value
# 11. Printing output as tab-delimited text or HTML.
#
# Dependencies:
# Perl modules
# 	Getopt::Long;
# 	Bio::SeqIO
# 	Bio::SearchIO
# files
#	$models*.hmm
#   codon_tables_ncbi.txt
# executables
#   hmmsearch
#	bootstrap2
#	phobius.pl
#	get_orf_from_contig.pl
#
###########################################################################################################


# Configurations:
# path to the hmmsearch executable
my $hmmsearch= "hmmsearch";
my $phobius= "phobius";
my $get_orf= "./get_orf_from_contig_v2.pm";
my $hypgeom= "./bootstrap/bootstrap2"; # path to the c++ hypergeometric P-value estimator
# Paths to temporary files. Note that process executing this program should have write access to these files.
my $temp_fasta= "./temp.faa";
my $buffer1= "./buffer1";
my $buffer2= "./buffer2";
# path to the directory containing hmm models
my $models= "./models/";


# Help message
my $helpm="use: locp.pl [options] filein1 [filein2..]\n".
		  "filein\tinput file in fasta format\n".
		  "options\n".
		  "\t-A value\tsequence alphabet, dna or prot (default)\n".
		  "\t-T value\tminimum open reading frame size (default=50)\n".
		  "\t-S value\tscore cutoff (default 0)\n".
		  "\t-P value\tP-value cutoff (default 1)\n".
		  "\t-P_adj value\tP_adj cufoff (default 0.05)\n".
		  "\t-L value\toperon length cutoff (default 0)\n".
		  "\t-C\t\tanalyse one contigue at a time\n".
		  "\t-V dest\tverbal: prints progress messages to dest (off by default), where dest can be STDOUT or filename\n".
		  "\t-mgap value\tmaximum gap allowed in segments (default 5)\n".
		  "\t-sort key\tsorting results by key: i_begin(default), score, P and P_adj\n".
		  "\t-print mode\tprintout mode: text(default) or html\n".
		  "\t-fileout\tprint to file(s) named by appending \".out\" or \"_out.html\" to input file name(s)\n";
# Default arguments
my %opt= ('A'=>'prot',
		  'T'=>50,
		  'S'=>0,
		  'P'=>1,
		  'P_adj' =>0.05,
		  'L'=>0,
		  'C'=> !1,
		  'V'=> !1,
		  'maxgap' => 5,
		  'sort' => 'i_begin',
		  'print'=> 'text',
		  'fileout'=>!1);
# Reading command line arguments
my $web= !1;
GetOptions('A=s' => \$opt{'A'},
		   'T=i' => \$opt{'T'},
		   'S=f' => \$opt{'S'},
		   'P=f' => \$opt{'P'},
		   'P_adj=f' => \$opt{'P_adj'},
		   'L=i' => \$opt{'L'},
		   'C' => \$opt{'C'},
		   'V=s' => \$opt{'V'},
		   'mgap=i' => \$opt{'maxgap'},
		   'sort=s' => \$opt{'sort'},
		   'print=s' => \$opt{'print'},
		   'fileout' => \$opt{'fileout'},
		   'web' => \$web);

my $IUPAC_prot= 'ARNDCQEGHILKMFPSTWYVX';
my $IUPAC_dna= 'ACGTRYKMSWBDHVN';

# creating handles for log print
my $LOGOUT;
if($opt{'V'}){
	if($opt{'V'} eq 'STDOUT'){
		$LOGOUT= \*STDOUT;
	}
	else{
		open(LOUT,">".$opt{'V'}) or die "Can\'t open file for logging: $opt{'V'}: $!\n";
		$LOGOUT= \*LOUT;
	}
}

# Checking comamnd line arguments
if( !($opt{'sort'} eq 'i_begin' || $opt{'sort'} eq 'score' || $opt{'sort'} eq 'P' || $opt{'sort'} eq 'P_adj') ){
	$opt{'sort'}= 'i_begin';
}
if( !($opt{'A'} eq 'dna' || $opt{'A'} eq 'prot') ){
	$opt{'A'}= 'prot';
}

if ( @ARGV == 0 ) {
	print $helpm;
	exit 1;
}

# when printing to STD we append html opening tags before data tables and closing tags after data tables
if( ($opt{'print'} eq 'html') && !$opt{'fileout'} ){
	print "<html><body>";
}
my $file_count=0;
for my $fin(@ARGV){
	$opt{'fin'}= $fin;
	if($opt{'V'}){print $LOGOUT (++$file_count), " analysing ",$opt{'fin'},"\n";}
	main(\%opt,$web);
}
if( ($opt{'print'} eq 'html') && !$opt{'fileout'} ){
	print "</body></html>";
}



sub main{
	my %opt= %{shift()};
	my $web= shift();
	my $line;
	my $command;
	my $nNkK= !1;

	# HANDLING MAC OS LINE CHANGES
	open(IN, "<$opt{'fin'}") or die "Can\'t open file $opt{'fin'}: $!\n";
	open(OUT, ">$temp_fasta");
	while($line=<IN>){
		$line =~ s/[\r]+/\n/g;
		print OUT $line;
	}
	close(IN);close(OUT);

	# (1) deleteing/replacing non IUPAC chars from DNA seq (ADD ON FOR METAGENOME ANALYSIS)
	if($opt{'A'} eq 'dna'){
		if($opt{'V'}){print $LOGOUT "#   preprocessing dna sequence..";}
		my $in  = Bio::SeqIO->new(-file => "<".$temp_fasta, '-format' => 'Fasta');
		my $out = Bio::SeqIO->new(-file => '>'.$buffer1, '-format' => 'Fasta');
		while( my $seq = $in->next_seq() ){
			# deleting/replacing nonIUPAC
			my $seqstr= $seq->seq();
			$seqstr =~ s/^[^$IUPAC_dna]+//g;
			$seqstr =~ s/[^$IUPAC_dna]+$//g;
			$seqstr =~ s/[^$IUPAC_dna]/N/g;
			my $newseq= Bio::Seq->new( -display_id => $seq->display_id(),
								-description => $seq->description(),
								-seq => $seqstr);
			$out->write_seq($newseq);
		}
		if($opt{'V'}){print $LOGOUT "done\n";}
		$command= "cp -f $buffer1 $temp_fasta";
		system($command)==0
			or die "exception while running $command: EXITING\n";
	}


	# (1) Converting DNA to ORFs (ADD ON FOR METAGENOME ANALYSIS)
	if($opt{'A'} eq 'dna'){
		if($opt{'V'}){print $LOGOUT "#   converting DNA to open reading frames..";}
		$command= "$get_orf -L $opt{'T'} $temp_fasta > $buffer1";
		system($command) ==0
			or die "exception while running $command: EXITING\n";
		if($opt{'V'}){print $LOGOUT "done\n";}
		$command= "cp -f $buffer1 $temp_fasta";
		system($command)==0
			or die "exception while running $command: EXITING\n";
	}


	# (2) Saving annotation, adding running number and deleting/replacing non IUPAC chars
	my @seqids;
	my @seqannotations;
	if($opt{'V'}){print $LOGOUT "#   preprocessing protein sequence..";}
	my $in  = Bio::SeqIO->new(-file => '<'.$temp_fasta, '-format' => 'Fasta');
	my $out = Bio::SeqIO->new(-file => '>'.$buffer1, '-format' => 'Fasta');
	my $seqcount=0;
	while( my $seq = $in->next_seq() ){
		# saving annotation
		push(@seqids,$seq->display_id());
		push(@seqannotations, $seq->description());

		# deleting/replacing nonIUPAC: would crash phobius
		my $seqstr= $seq->seq();
		$seqstr =~ s/^[^$IUPAC_prot]+//g;
		$seqstr =~ s/[^$IUPAC_prot]+$//g;
		$seqstr =~ s/[^$IUPAC_prot]/X/g;
		my $newseq= Bio::Seq->new( -display_id => $seq->display_id(),
								-description => $seq->description(),
								-seq => $seqstr);

		# adding running number
		$newseq->display_id($seqcount++."|".$seq->display_id());
		$out->write_seq($newseq);
	}
	$command= "cp -f $buffer1 $temp_fasta";
	system($command)==0
		or die "exception while running $command: EXITING\n";
	if($opt{'V'}){print $LOGOUT "done\n";}


	# (3) Running and parsing HMM models
	if($opt{'V'}){print $LOGOUT "#   running HMMs:\n";}
	my @models= <$models*.hmm>;
	my $modelcount= $#models+2; # HMM models plus null-model
	my $options= "-Z $seqcount -E $seqcount";
	my $temphmm= "temp.hmm";
	my $hit;
	my $evalue;
	my $score;
	my $search;
	my $result;
	my @model_names;
	my @model_an;

	# allocating matricies for parsed hit-objects, e-values and score-values
	my @hit_mat;
	if($opt{'print'} eq 'html'){

		$#hit_mat= $seqcount-1;
	}
	my @evalue_mat;
	$#evalue_mat= $seqcount-1;
	my @score_mat;
	$#score_mat= $seqcount-1;
	for(my $seqi=0; $seqi<$seqcount; $seqi++){
		if($opt{'print'} eq 'html'){
			my @a;
			$#a= $modelcount-1;
			$hit_mat[$seqi]= \@a;
		}

		my @b;
		$#b= $modelcount-1;
		my @c;
		$#c= $modelcount-1;
		for(my $i=0; $i<=($modelcount-1); $i++){
			$b[$i]= 1;
			$c[$i]= 0;
		}

		$evalue_mat[$seqi]= \@b;
		$score_mat[$seqi]= \@c;
	}

	# null model scores and e-values, these are set to 0 and the total number of sequences respectively
	push(@model_names,"null");
	push(@model_an,"[none]");
	for(my $seqi=0; $seqi<$seqcount; $seqi++){
		#$hit_mat[$seqi]->[0]= 0;
		$score_mat[$seqi]->[0]= 0;
		$evalue_mat[$seqi]->[0]= $seqcount;
	}

	# running hmmsearch and parsing: parsed HMM model hit-objects, scores and e-values
	for(my $mi=0; $mi<=$#models; $mi++){
		if($opt{'V'}){print $LOGOUT "#   running $models[$mi]..";}
		system("$hmmsearch $options $models[$mi] $temp_fasta > $buffer1") ==0
			or die "exception while running hmmsearch: EXITING\n";
		$search = new Bio::SearchIO(-format => 'hmmer',
									-file   => "$buffer1");
		$result = $search->next_result();

		push(@model_names,$result->query_name());
		push(@model_an,$result->query_accession());

		while($hit = $result->next_hit()){
			my $id= $hit->name();
			my $seqi= (split(/[|]/,$id))[0];
			if($opt{'print'} eq 'html'){
				$hit_mat[$seqi]->[$mi+1]= $hit;
			}
			$score_mat[$seqi]->[$mi+1]= $hit->score();
			$evalue_mat[$seqi]->[$mi+1]= $hit->significance();
		}
		if($opt{'V'}){print $LOGOUT "done\n";}
	}


	# (4) Sorting hmm:s by bitscore and selecting best hmm for each sequence
	my @mis;		# indices of best models
	my @hmmhits; 	# binary vector marking seq with score above 0 (modeled with HMM)
	my @scores;		# scores for the best models
	my @evalues;	# e-values for the best models
	my @pvalues;	# p-values for the best models
	for(my $seqi=0; $seqi<$seqcount; $seqi++){
		my $mi= maxi($score_mat[$seqi]);
		push(@mis,$mi);
		push(@hmmhits,($mi==0)? !1 : 1);
		push(@scores, $score_mat[$seqi]->[$mi]);
		push(@evalues, $evalue_mat[$seqi]->[$mi]);
		push(@pvalues, ($evalue_mat[$seqi]->[$mi])/$seqcount);
	}



	# (5) Scanning for HMM clusters (==segments) with various maximum gap sizes
	if($opt{'V'}){print $LOGOUT "#   scanning for HMM clusters..";}
	my @segms;

	if($opt{'C'}){
		my @contigs= getContigs(\@seqids);
		my @hmmhits_c; # binary array of hits for one contig
		my @segms_cand;
		for my $contig(@contigs){

			@hmmhits_c= subarray(\@hmmhits,$contig->[0],$contig->[1]);
			my %segms_found;

			for(my $gap=0; $gap<=$opt{'maxgap'}; $gap++){
				my @segms_cand= get_segments(\@hmmhits_c,$gap);
				for(my $i=0; $i<=$#segms_cand; $i++){
					my $i_begin=	($segms_cand[$i]->[0])+($contig->[0]);
					my $i_end= 		($segms_cand[$i]->[1])+($contig->[0]);

					if( !defined($segms_found{$i_begin}) ){
						my %segm= ('i_begin'=>$i_begin, 'i_end'=> $i_end);
						push(@segms,\%segm);

						my %temp= ($i_end => 1);
						$segms_found{$i_begin}= \%temp;
					}

					elsif( !defined($segms_found{$i_begin}->{$i_end}) ){
						my %segm= ('i_begin'=>$i_begin, 'i_end'=> $i_end);
						push(@segms,\%segm);

						$segms_found{$i_begin}->{$i_end}= 1;
					}
				}
			}
		}
	}
	else{
		my %segms_found;
		my @segms_cand;

		for(my $gap=0; $gap<=$opt{'maxgap'}; $gap++){
			my @segms_cand= get_segments(\@hmmhits,$gap);
			for(my $i=0; $i<=$#segms_cand; $i++){
				my $i_begin= $segms_cand[$i]->[0];
				my $i_end= $segms_cand[$i]->[1];

				if( !defined($segms_found{$i_begin}) ){
					my %segm= ('i_begin'=>$i_begin, 'i_end'=> $i_end);
					push(@segms,\%segm);

					my %temp= ($i_end => 1);
					$segms_found{$i_begin}= \%temp;
				}

				elsif( !defined($segms_found{$i_begin}->{$i_end}) ){
					my %segm= ('i_begin'=>$i_begin, 'i_end'=> $i_end);
					push(@segms,\%segm);

					$segms_found{$i_begin}->{$i_end}= 1;
				}
			}
		}
	}
	if($opt{'V'}){print $LOGOUT "done\n";}


	# (6) Segment scores and p-values
	if($opt{'V'}){print $LOGOUT "#   calculating segment scores..";}
	for my $segm(@segms){
		my @temp= subarray(\@scores,$segm->{'i_begin'},$segm->{'i_end'});
		my $score= get_sum( \@temp );
		$segm->{'score'}= $score;

		#@temp= subarray(\@pvalues,$segm->{'i_begin'},$segm->{'i_end'});
		#my $pvalue= fishers_method(\@temp);
		#$segm->{'P'}=$pvalue;
	}
	if($opt{'V'}){print $LOGOUT "done\n";}


	# Calling the c++ hypergeometric P-value estimator
	if($opt{'V'}){print $LOGOUT "#   estimating segment P-values..";}

	my $N= $#seqids+1;
	my $K= count(\@hmmhits);

	open(OUT,">".$buffer1) or die "Can\'t open file $buffer1: $!\n";
	print OUT "N\t$N\n";
	print OUT "K\t$K\n";
	print OUT "maxgap\t$opt{'maxgap'}\n";
	print OUT "\n";
	print OUT "n\tk\n";
	for my $segm(@segms){
		my $n= $segm->{'i_end'} - $segm->{'i_begin'}+1;
			my @temp= subarray(\@hmmhits, $segm->{'i_begin'},$segm->{'i_end'});
		my $k= count(\@temp);
		print OUT "$n\t$k\n";
	}
	print OUT "\n";
	close(OUT);
	my $call= "$hypgeom $buffer1 $buffer2";
	system($call) ==0 or die "exeption while executing system call: $call\n";

	# Reading in results
	open(IN,"<".$buffer2) or die "Can\'t open file $buffer2: $!\n";
	while($line=<IN>){
		if($line eq "\n"){
			last;

		}
	}

	$line= <IN>;
	chomp($line);
	my @splitted= split(/[\t]+/,$line);
	my %index_of;
	for(my $i=0; $i<=$#splitted; $i++){
		if($splitted[$i] eq 'P'){
			$index_of{'P'}= $i;
		}elsif($splitted[$i] eq 'P_adj'){
			$index_of{'P_adj'}= $i;
		}
	}

	for my $segm(@segms){
		$line=<IN>;
		chomp($line);
		@splitted= split(/[\t]+/,$line);
		$segm->{'P'}= $splitted[$index_of{'P'}];
		$segm->{'P_adj'}= $splitted[$index_of{'P_adj'}];
	}
	if($opt{'V'}){print $LOGOUT "done\n";}


	# (7) Thresholding segments
	@segms= thresholdPLDRs(\@segms,\%opt);

	# (8) Filtering top level segments
	@segms= filtertopwin(\@segms);


	# (9) Running Phobius on seqs that passed threshold
	my %phobius_results;
	my %seqi_passed;
	my $seqid;
	my $seqi;
	my $seq_count=0;
	for my $segm(@segms){
		for($seqi=$segm->{'i_begin'}; $seqi<=$segm->{'i_end'}; $seqi++){
			$seqi_passed{$seqi}= 1;
			$seq_count++;
		}
	}
	if($seq_count>0){
		my $fin  = Bio::SeqIO->new(-file => '<'.$temp_fasta , '-format' => 'Fasta');
		my $fout = Bio::SeqIO->new(-file => '>'.$buffer1, '-format' => 'Fasta');

		while( my $seq = $fin->next_seq() ){
			$seqid= $seq->display_id();
			$seqi= (split(/[|]/,$seqid))[0];
			if( defined($seqi_passed{$seqi}) ){
				$fout->write_seq($seq);
			}
		}

		if($opt{'V'}){print $LOGOUT "#   running phobius..";}
		system("$phobius $buffer1 >& $buffer2") ==0
				or die "exception while running phobius: EXITING\n";
		%phobius_results= parsePhobius2PointerTable($buffer2);
		if($opt{'V'}){print $LOGOUT "done\n";}
	}


	# (10) Sorting
	if($opt{'V'}){print $LOGOUT "#   sorting..";}
	my $k= $opt{'sort'};
	if($k eq 'score'){
		@segms= sort {$b->{$k} <=> $a->{$k}} @segms; # sorting numerically descending
	}
	else{ #i_begin, P and P_adj
		@segms= sort {$a->{$k} <=> $b->{$k}} @segms; # sorting numerically ascending
	}
	if($opt{'V'}){print $LOGOUT "done\n";}


	# (11) Printing
	my $h=\*STDOUT;
	if($opt{'fileout'}){
		my $fout= $opt{'fin'}. (($opt{'print'} eq 'html')? '_out.html':'_out');
		open(OUT,">".$fout) or die "Can\'t open file $fout: $!\n";
		$h= \*OUT;
	}

	if($opt{'print'} eq 'html'){
		if($opt{'fileout'}){
			print $h "<html><body>";
		}
		htmlprint($h,
				  \%opt,
				  \@seqids,
				  \@seqannotations,
				  \@model_names,
				  \@model_an,
				  \@hmmhits,
				  \@score_mat,
				  \@evalue_mat,
				  \@hit_mat,
				  \@segms,
				  \%phobius_results);
		if($opt{'fileout'}){
			print $h "</body></html>";
		}
	}
	else{
		if($web){
			print $h "<pre>\n";
		}
		textprint($h,
				  \%opt,
				  \@seqids,
				  \@seqannotations,
				  \@model_names,
				  \@model_an,
				  \@hmmhits,
				  \@score_mat,
				  \@evalue_mat,
				  \@segms,
				  \%phobius_results);
		if($web){
			print $h "</pre>\n";
		}
	}
}

sub textprint{
	my $h= shift(@_);
	my %opt= %{shift(@_)};
	my @seqids= @{shift(@_)};
	my @seqannotations= @{shift(@_)};
	my @model_names= @{shift(@_)};
	my @model_an= @{shift(@_)};
	my @hmmhits= @{shift(@_)};
	my @score_mat= @{shift(@_)};
	my @evalue_mat= @{shift(@_)};
	my @segms= @{shift(@_)};
	my %phobius_results= %{shift(@_)};
	my @mis;
	my $mn= $#model_names + 1;
	for(my $mi=0; $mi< $mn; $mi++){
		push(@mis,$mi);
	}

	my $N= $#seqids+1;
	my $K= count(\@hmmhits);

	print $h getProgDesc(\%opt);
	print $h "#N= $N\n";
	print $h "#K= $K\n";
	print $h "\n";

	printf $h "OP:\tstart_i\tend_i\tscore\tP\tP_adj\tpilus\n";
	printf $h "SEQ:\ti\tid\tHMMnames\tHMMans\tHMMscores\tHHMevalues\tsignalpep\tC-anchor\n\n";

	if($#segms<0){
		print $h "# NO PILI WERE DETECTED\n";
	}

	my $seqi=0; # sequence index
	for my $segm(@segms){

		# segment info
		print $h "OP\t",
			$segm->{'i_begin'},"\t",
			$segm->{'i_end'},"\t",
			$segm->{'score'},"\t",
			$segm->{'P'},"\t",
			$segm->{'P_adj'},"\t",
			(($segm->{'pili'})? 'Y':'N'),"\n";

		#sequence info
		for($seqi=$segm->{'i_begin'}; $seqi<=$segm->{'i_end'}; $seqi++){
			my @scores= @{$score_mat[$seqi]};
			my @evalues= @{$evalue_mat[$seqi]};
			my @mis_sorted= sort {$scores[$b] <=> $scores[$a]} @mis;
			my @phobius_table= @{$phobius_results{$seqi}};

			print $h "SEQ\t";			# seq flag
			print $h $seqi,"\t",		# seq ind
				   $seqids[$seqi];			# seq id

			my $i2= 0;
			for (my $i=1; $i<=$#mis_sorted; $i++){
				if($scores[$mis_sorted[$i]]<=0){
					$i2= $i-1;
					last;
				}
			}
			print $h "\t",$model_names[$mis_sorted[0]];		# seq HMMnames
			for (my $i=1; $i<=$i2; $i++){
				print $h ",",$model_names[$mis_sorted[$i]];
			}

			print $h "\t",$model_an[$mis_sorted[0]];		# seq HMMans
			for (my $i=1; $i<=$i2; $i++){
				print $h ",",$model_an[$mis_sorted[$i]];
			}

			print $h "\t",$scores[$mis_sorted[0]];			# seq scores
			for (my $i=1; $i<=$i2; $i++){
				print $h ",",$scores[$mis_sorted[$i]];
			}

			print $h "\t",$evalues[$mis_sorted[0]]; 		# seq HMMevalues
			for (my $i=1; $i<=$i2; $i++){
				print $h ",",$evalues[$mis_sorted[$i]];
			}

			print $h "\t",					# seq signalpep
				(($phobius_table[0]->[1] eq 'SIGNAL')?'Y':'N');

			print $h "\t",					# seq C-anchor
				(($phobius_table[$#phobius_table-1]->[1] eq 'TRANSMEM') && ($phobius_table[$#phobius_table]->[4] eq 'CYTOPLASMIC'))?'Y':'N';

			print $h "\n";
		}
		print $h "\n"; # separates operon entries for readability
	}
}


sub htmlprint{
	my $h= shift(@_);
	my %opt= %{shift(@_)};
	my @seqids= @{shift(@_)};
	my @seqannotations= @{shift(@_)};
	my @model_names= @{shift(@_)};
	my @model_an= @{shift(@_)};
	my @hmmhits= @{shift(@_)};
	my @score_mat= @{shift(@_)};
	my @evalue_mat= @{shift(@_)};
	my @hit_mat= @{shift(@_)};
	my @segms= @{shift(@_)};
	my %phobius_results= %{shift(@_)};
	my @mis;
	my $mn= $#model_names + 1;
	for(my $mi=0; $mi< $mn; $mi++){
		push(@mis,$mi);
	}

	# JavaScipt function and css classes for opening/closing additional sequence information
	print $h "<script type=\"text/javascript\" language=\"JavaScript\">\n",
			 "<!--\n",
			 "function openClose(id){\n",
			 "   var el = document.getElementById(id);\n",
			 "   if(el.className == \"openClass\"){\n",
             "      el.className = \"closedClass\";\n",
        	 "   }\n",
        	 "   else{\n",
           	 "      el.className = \"openClass\";\n",
        	 "   }\n",
			 "}\n",
			 "//-->\n",
			 "</script>\n",

			 "<style type=\"text/css\">\n",
			 "	body { font-family:monospace; }\n",
			 "	p {text-align:center;}\n",
			 "	p.h1 {font-size:16px;\n",
			 "			text-decoration:underline;}\n",
			 "	p.h2 {font-size:14px;\n",
   		     "			text-decoration:underline;}\n",
			 "   .openClass { display: table-row; }\n",
			 "   .closedClass { display: none; }\n",
			 "</style>\n";

	my $col=8;
	my $N= $#seqids+1;
	my $K= count(\@hmmhits);
	print $h "<TABLE border=\"1\" align=\"center\" cellpadding=\"5\" cellspacing=\"0\">\n",
			 "<thead>\n",
			 "<tr><td colspan=\"$col\">\n",
			 "<a href=\"javascript:openClose(\'parameters\');\">LOCP parameters</a>",
			 "</td></tr>\n",
			 "<tr class=\"closedClass\" id=\"parameters\"><td colspan=\"$col\">",
			 getProgDesc(\%opt),"\n",
			 "<br>#N= $N\n",
			 "<br>#K= $K\n",
			 "</td></tr>\n",
			 "<tr>\n",
			 	"<th>Ind</td>\n",
			 	"<th>SeqID</th>\n",
			 	"<th>TopHMMName</th>\n",
			 	"<th>SignalpPep</th>\n",
			 	"<th>C-anchor</th>\n",
			 	"<th>Score</th>\n",
			 	"<th>P</th>\n",
			 	"<th>P_adj</th>\n",
			 "</tr>\n",
			 "</thead>\n",
			 "<tfoot>\n",
			 "<tr>\n",
			 	"<th>Ind</td>\n",
			 	"<th>SeqID</th>\n",
			 	"<th>TopHMMName</th>\n",
			 	"<th>SignalpPep</th>\n",
			 	"<th>C-anchor</th>\n",
			 	"<th>Score</th>\n",
			 	"<th>P</th>\n",
			 	"<th>P_adj</th>\n",
			 "</tr>\n",
			 "</tfoot>\n";

	print $h "<tbody>\n";
	my $seqi=0; # sequence index
	my $hsp;	# high scoring pair object
	my $first_segm= 1;

	if($#segms<0){
		print $h "<tr><td colspan=\"$col\"><br>NO PILI WERE DETECTED<br><br></td></tr>\n";
	}
	for
	my $segm(@segms){

		if($first_segm){
			$first_segm= !1;
		}
		else{ # empty line between consecutive operons
			print $h "<tr class=\"openClass\"><td colspan=\"$col\"> </td></tr>\n";
		}

		for($seqi=$segm->{'i_begin'}; $seqi<=$segm->{'i_end'}; $seqi++){
			my @scores= @{$score_mat[$seqi]};
			my @evalues= @{$evalue_mat[$seqi]};
			my @hits= @{$hit_mat[$seqi]};
			my @mis_sorted= sort {$scores[$b] <=> $scores[$a]} @mis;
			my @phobius_table= @{$phobius_results{$seqi}};

			# basic info, visible by default
			print $h "<tr class=\"openClass\">\n",
					 "   <td>",$seqi,"</td>\n",
					 "   <td><a href=\"javascript:openClose(\'seq",$seqi,"\');\">\n",
					 	 substr($seqids[$seqi],0,20),"</a></td>\n",
					 "   <td>",$model_names[$mis_sorted[0]],"</td>\n",
					 "	 <td>",(($phobius_table[0]->[1] eq 'SIGNAL')?'Y':'N'),"</td>\n",
					 "	 <td>",(($phobius_table[$#phobius_table-1]->[1] eq 'TRANSMEM') && ($phobius_table[$#phobius_table]->[4] eq 'CYTOPLASMIC'))?'Y':'N',"</td>\n",
					 "   <td>",$segm->{'score'},"</td>\n",
					 "   <td>",$segm->{'P'},"</td>\n",
					 "   <td>",($segm->{'P_adj'}>0)? $segm->{'P_adj'}: "0/1000","</td>\n",
					 "</tr>\n";

			# additional info, by default hidden
			print $h "<tr class=\"closedClass\" id=\"seq",$seqi,"\">\n",
					 "	<td colspan=\"$col\">\n",

					 "  <p class=\"h1\">Sequence</p>\n",
					 "	id: $seqids[$seqi]\n",
					 "	<br>annotation: $seqannotations[$seqi]\n",

					 "	<p class=\"h1\">HMM hits</p>\n",

					 "<table align=\"center\" border=\"1\" cellpadding=\"5\" cellspacing=\"0\">\n",
					 "	<tr><th>name</th>\n",
					 "		<th>assession number</th>\n",
					 "		<th>score</th>\n",
					 "		<th>e-value</th>\n",
					 "	</tr>\n",
					 "	<tr><th colspan=\"4\">Alignment</th>\n",
					 "	</tr>\n";

			for my $mi(@mis_sorted){
				if($scores[$mi]< 0 || $mi==0){
					last;
				}
				print $h "	<tr align=\"center\"><td><a href=\"javascript:openClose(\'seq",$seqi,"_hmm",$mi,"\');\">",$model_names[$mi],"</a></td>\n",
						 "		<td>$model_an[$mi]</td>\n",
						 "		<td>$scores[$mi]</td>\n",
						 "		<td>$evalues[$mi]</td>\n",
						 "	</tr>\n";

				# printing seq to HMM alignment one hsp at a time
				print $h "	<tr id=\"seq",$seqi,"_hmm",$mi,"\" class=\"closedClass\">\n",
						 "		<td colspan=\"4\">\n";
				my $domaini=1;
				while( $hsp = $hits[$mi]->next_hsp()){
				print $h "		<p class=\"h2\">domain $domaini of ", $hits[$mi]->num_hsps(),"</p>\n",
						 "<pre>",
						 "Seq:",$hsp->hit_string,"\n",
						 "Hom:",$hsp->homology_string,"\n",
						 "HMM:",$hsp->query_string,
						 "</pre>\n";
				}
				print $h "		</td>\n",
						 "	</tr>\n";
			}
			print $h "</table>\n";

			print $h "<p class=\"h1\">Phobius</p>\n";
			print $h "<table align=\"center\">\n";
			for(my $i=0; $i<=$#phobius_table; $i++){
				my @row= @{$phobius_table[$i]};
				print $h "	<tr>\n";
				for(my $j=0; $j<=$#row; $j++){
					print $h "<td>$row[$j]</td>";
				}
				print $h "	</tr>\n";
			}
			print $h "</table>\n";


			print $h "</td>\n",
					 "</tr>\n";
		}
	}
	print $h "</tbody>\n",
			 "</TABLE>\n";
}

# Returns description of this program
sub getProgDesc{
	my %opt= %{shift(@_)};
	my $html= $opt{'print'} eq 'html';
	my $desc=
	"###################################################\n".
	(($html)? "<br>" : "" ) .
	"# locp.pl\n";
	for my $k(keys %opt){
		if($html){
			$desc.="<br>";
		}
		if($k eq 'V' || $k eq 'fileout' || $k eq 'C'){
			$desc.= "#\t-$k:\t";
			$desc.= (($opt{$k})? 'yes':'no');
			$desc.= "\n";
		}
		else{
			$desc.= "#\t-$k:\t$opt{$k}\n";
		}
	}
	$desc.=
	(($html)? "<br>" : "" ) .
	"###################################################\n";
	return $desc;
}

# Returns Sum
sub get_sum{
	my @v= @{$_[0]};
	my $s= 0;
	for my $e(@v){
		$s+=$e;
	}
	return $s;
}

# Returns a subarray i1 to i2.
sub subarray{
	my @a= @{$_[0]};
	my $i1= $_[1];
	my $i2= $_[2];
	my @sa;
	for(;$i1<=$i2; $i1++){
		push(@sa, $a[$i1]);
	}
	return @sa;
}

# Returns number of true values in binary table
sub count{
	my @a= @{$_[0]};
	my $n=0;
	for my $e(@a){
		if($e){
			$n++;
		}
	}
	return $n;
}

# Returns maximum index
sub maxi{
	my @v= @{$_[0]};
	my $k=0;
	for(my $i=0; $i<=$#v; $i++){
		if($v[$i]>$v[$k]){
			$k= $i;
		}
	}
	return $k;
}

# Scans binary array for clusters of consecutive true-values and returns an array structure
# specifying the segments found.
# parameters:
#	1. reference to binary array to be scanned
#	2. maximum gap length
# returns:
#	N x 2 matrix C specified as an array of array pointers, where each subarray @{$C[i]} is
#	a pair of numbers defining the starting, $C[i]->[0], and ending, $C[i]->[1], indices of
#	segment i.
sub get_segments{
	my @b= @{$_[0]};
	my $maxgap= $_[1];

	my @C;
	my $i1=0;
	my $i2=0;
	my $i3=0;
	loop1:while($i1<=$#b){
		#print "$i1\n";
		if(!$b[$i1]){
			$i1++;
		}
		else{
			$i2=$i1;
			loop2:while($i2<=$#b){
				#print "   $i2\n";
				if($b[$i2]){
					if($i2==$#b){
						my @cluster= ($i1,$i2);
						push(@C,\@cluster);
						last loop1; # iterated till the end of array
					}
					else{
						$i2++;
					}
				}
				else{
					$i3=$i2;
					loop3:while($i3<=$#b){
						#print "      $i3\n";
						if(($i3-$i2)>$maxgap){
							my @cluster= ($i1,$i2-1);
							push(@C,\@cluster);
							$i1=$i3;
							last loop2; # back to loop1
						}
						elsif($b[$i3]){
							$i2=$i3;
							last loop3; # back to loop2
						}
						elsif($i3==$#b){
							my @cluster= ($i1,$i2-1);
							push(@C,\@cluster);
							last loop1; # iterated till the end of array
						}
						else{
							$i3++;
						}
					} #close loop3
				}
			} #close loop2
		}
	} #close loop1
	return @C;
}


#
# Returns array specifying first and last indices of all contigs.
# Contigs are identified by the first "|"-delimited value in the id field.
#
sub getContigs{
	my @seqids= @{$_[0]};

	my @contigs;
	my $contigid_b;
	my $contigid;
	my $i_b;
	my $i;

	$i_b= 0;
	$contigid_b= (split(/[|]/,$seqids[$i_b]))[1];
	for($i=$i_b+1; $i<=$#seqids; $i++){
		$contigid= (split(/[|]/,$seqids[$i]))[1];
		if( !($contigid_b eq $contigid) ){
			#print $contigid_b,"!=",$contigid,"\n";
			my @contig= ($i_b,($i-1));
			push(@contigs,\@contig);
			$i_b= $i;
			$contigid_b= $contigid;
		}
	}
	# last segment
	my @contig2= ($i_b,($i-1));
	push(@contigs,\@contig2);

	return @contigs;
}


#
# This function parses phobius output to a hash of tables, where each table
# is implemented as an array of array pointers.
# This function assumes that each ID field in phobius output begins with
# a unique number followed by | character. This number is used as the key
# for the corresponding table in the hash returned.
#
sub parsePhobius2PointerTable{
	my $fin= shift;
	open(IN,$fin) or die "Can\'t open file $fin: $!\n";
	my $line;
	<IN>;<IN>;<IN>; # Reference lines

	my %table;
	my @record;
	my $inrecord= !1;
	my $id;
	my @splitted;

	while($line=<IN>){
		chomp($line);
		$line =~ s/[.]$//;
		if($line =~ /^['ID']/){ # entering record
			@splitted= split(/[' ']+/,$line);
			@splitted= split(/[|]/,$splitted[1]);
			$id= $splitted[0];
			@record=();
			$inrecord= 1;
		}
		elsif($line eq '//'){
			if($inrecord){ # exiting record
				my @copy= @record;
				$table{$id}= \@copy;
				$inrecord= !1;
			}
		}
		else{
			my @row= split(/[' ']{2,}/,$line);
			push(@record, \@row);
		}
	}

	return %table;
}

#
# Filters out top level windows, these are windows that are not included in any other window.
# Takes as argument table of windows. Returns table of top level windows.
#
sub filtertopwin{
	my @table= @{shift(@_)};
	my @table_top;

	row:for my $win1(@table){
		col:for my $win2(@table){
			if($win1 == $win2){
				next col;
			}
			if(  ($win2->{'i_begin'}) <= ($win1->{'i_begin'}) && ($win1->{'i_end'}) <= ($win2->{'i_end'}) ){  # window 1 is included in the window 2
				next row;
			}
		}
		# window is not included in any other window
		push(@table_top,$win1);
	}
	return @table_top;
}

#
# Returns a subset of PLDRs that pass thresholds
sub thresholdPLDRs{
	my @pldrs= @{shift(@_)};
	my %opt= %{shift(@_)};

	my @pldrs_th;
	for my $pldr(@pldrs){
		if( ($pldr->{'i_end'} - $pldr->{'i_begin'} + 1) >= $opt{'L'}
			&& $pldr->{'score'} >= $opt{'S'}
			&& $pldr->{'P'} <= $opt{'P'}
			&& $pldr->{'P_adj'} <= $opt{'P_adj'} ){
				push(@pldrs_th,$pldr);
		}
	}

	return @pldrs_th;
}
