#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdarg.h>
#include <ctype.h>
#include <math.h>
#include "leon.h"

#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))

float normalise_score(float score,float n,float ntot,float ntotseq)
{
        float ret;

        if(n==0) ret=0.0;
        else
                ret=score*exp(-10.0*(float)(ntot-n)/((float)(ntot)));

        return ret;

}

void sort_scores(float *scores,int f,int l)
{
        int i,last;

        if(f>=l) return;

        swap_scores(scores,f,(f+l)/2);
        last=f;
        for(i=f+1;i<=l;i++)
        {
                if(scores[i]>scores[f])
                        swap_scores(scores,++last,i);
        }
        swap_scores(scores,f,last);
        sort_scores(scores,f,last-1);
        sort_scores(scores,last+1,l);

}

void swap_scores(float *scores,int s1, int s2)
{
        float temp;

        temp=scores[s1];
        scores[s1]=scores[s2];
        scores[s2]=temp;
}

/*
*	ckalloc()
*
*	Tries to allocate "bytes" bytes of memory. Exits program if failed.
*	Return value:
*		Generic pointer to the newly allocated memory.
*/

void *ckalloc(size_t bytes)
{
	register void *ret;
	
	if( (ret = calloc(bytes, sizeof(char))) == NULL)
		fatal("Out of memory\n");
	else
		return ret;	

	return ret;	
}

/*
*	ckrealloc()
*
*	Tries to reallocate "bytes" bytes of memory. Exits program if failed.
*	Return value:
*		Generic pointer to the re-allocated memory.
*/

void *ckrealloc(void *ptr, size_t bytes)
{
	register void *ret=NULL;

	if (ptr == NULL)	
		fatal("Bad call to ckrealloc\n");
	else if( (ret = realloc(ptr, bytes)) == NULL)
		fatal("Out of memory\n");
	else
		return ret;	

	return ret;	
}

/*
*	ckfree()
*
*	Tries to free memory allocated by ckalloc.
*	Return value:
*		None.
*/

void *ckfree(void *ptr)
{
	if (ptr == NULL)
		warning("Bad call to ckfree\n");
	else {
	 	free(ptr);
		ptr = NULL;
	}
	return ptr;
}


/*
*	rtrim()
*
*	Removes trailing blanks from a string
*
*	Return values:
*		Pointer to the processed string
*/

char * rtrim(char *str)
{
	register int p;

	p = strlen(str) - 1;
	
	while ( isspace(str[p]) )
		p--;
		
	str[p + 1] = EOS;
	
	return str;
}


/*
*	blank_to_()
*
*	Replace blanks in a string with underscores
*
*       Also replaces , ; : ( or ) with _
*
*	Return value:
*		Pointer to the processed string
*/

char * blank_to_(char *str)
{
	int i,p;

	p = strlen(str);
	
	for(i=0;i<p;i++) 
		if(
                     (str[i]==' ') ||
                     (str[i]==';') ||
                     (str[i]==',') ||
                     (str[i]=='(') ||
                     (str[i]==')') ||
                     (str[i]==':')
                  )
                      str[i] = '_';
	
	return str;
}


/*
*	upstr()
*
*	Converts string str to uppercase.
*	Return values:
*		Pointer to the converted string.
*/

char * upstr(char *str)
{
	register char *s = str;
	
	while( (*s = toupper(*s)) )
		s++;
		
	return str;
}

/*
*	lowstr()
*
*	Converts string str to lower case.
*	Return values:
*		Pointer to the converted string.
*/

char * lowstr(char *str)
{
	sint i,len;

	len=strlen(str);
	for(i=0;i<len;i++) {
		if(isupper(str[i])) str[i] = tolower(str[i]);
	}
		
	return str;
}

void getstr(char *instr,char *outstr)
{	
	fprintf(stdout,"%s: ",instr);
	gets(outstr);
}

double getreal(char *instr,double minx,double maxx,double def)
{
	int status;
	float ret;
	char line[MAXLINE];	
	
	while(TRUE) {
		fprintf(stdout,"%s (%.1f-%.1f)   [%.1f]: ",instr,minx,maxx,def);
		gets(line);
		status=sscanf(line,"%f",&ret);
		if(status == EOF) return def;
		if(ret>maxx) {
			fprintf(stdout,"ERROR: Max. value=%.1f\n\n",maxx);
			continue;
		}
		if(ret<minx) {
			fprintf(stdout,"ERROR: Min. value=%.1f\n\n",minx);
			continue;
		}
		break;
	}
	return (double)ret;
}


int getint(char *instr,int minx,int maxx, int def)
{
	int ret,status;
	char line[MAXLINE];	

	while(TRUE) {
		fprintf(stdout,"%s (%d..%d)    [%d]: ",
		instr,(pint)minx,(pint)maxx,(pint)def);
		gets(line);
		status=sscanf(line,"%d",&ret);
		if(status == EOF) return def;
		if(ret>maxx) {
			fprintf(stdout,"ERROR: Max. value=%d\n\n",(pint)maxx);
			continue;
		}
		if(ret<minx) {
			fprintf(stdout,"ERROR: Min. value=%d\n\n",(pint)minx);
			continue;
		}
		break;
	}
	return ret;
}

Boolean linetype(char *line,char *code)
{
	return( strncmp(line,code,strlen(code)) == 0 );
}

Boolean keyword(char *line,char *code)
{
	int i,j;
	char key[MAXLINE];

	for(i=0;isspace(line[i]) && line[i]!=EOS;i++);
	for(j=0;!isspace(line[i]) && line[i]!=EOS;i++)
		key[j++]=line[i];
	key[j]=EOS;
	return( strcmp(key,code) == 0 );
}

Boolean blankline(char *line)
{
	int i;

	for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
		if( isdigit(line[i]) ||
		    isspace(line[i]) ||
		    (line[i] == '*') ||
		    (line[i] == ':') ||
                    (line[i] == '.')) 
			;
		else
			return FALSE;
	}
	return TRUE;
}


void get_path(char *str,char *path)
{
	register int i;
	
	strcpy(path,str);
	for(i=strlen(path)-1;i>-1;--i) {
		if(str[i]==DIRDELIM) {
			i = -1;
			break;
		}
		if(str[i]=='.') break;
	}
	if(i<0)
		strcat(path,".");
	else
		path[i+1]=EOS;
}


void alloc_aln(sint nseqs,ALNPTR mult_aln)
{
	sint i,j;

	mult_aln->seqs = (SEQ *)ckalloc((nseqs+1) * sizeof (SEQ));
	mult_aln->ft = (FT *)ckalloc((nseqs+1) * sizeof (FT));
	mult_aln->repeat = (REP *)ckalloc((nseqs+1) * sizeof (REP));
	mult_aln->go = (GO *)ckalloc((nseqs+1) * sizeof (GO));
	for(i=0;i<nseqs;i++) {
		mult_aln->seqs[i].name = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
		mult_aln->seqs[i].access = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
		mult_aln->seqs[i].nid = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
		mult_aln->seqs[i].title = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
		mult_aln->seqs[i].org = (char *)ckalloc((MAXORGANISMS+1) * sizeof (char));
                mult_aln->seqs[i].data=NULL;
                mult_aln->seqs[i].mask=NULL;
		mult_aln->seqs[i].weight=100;
		mult_aln->seqs[i].len=0;
		mult_aln->seqs[i].sense=0;
		mult_aln->seqs[i].simgroup=0;
		for(j=0;j<MAXFTTYPE;j++)
			mult_aln->ft[i].nentries[j]=0;
		mult_aln->repeat[i].nrepeats=0;
		mult_aln->go[i].ngorefs=0;
	}

	mult_aln->motifs = NULL;
	mult_aln->groups.ngroups = 0;

        mult_aln->nseqs=0;
        mult_aln->ncol_scores=0;
        mult_aln->nanchors=0;
        mult_aln->dnaflag=FALSE;
        strcpy(mult_aln->alphabet,"ABCDEFGHIJKLMNOPQRSTUVWXYZ");
        mult_aln->prf1.nseqs=0;
        mult_aln->prf2.nseqs=0;
        mult_aln->treename[0]='\0';
        mult_aln->prf1.treename[0]='\0';
        mult_aln->prf2.treename[0]='\0';

}

void realloc_aln(sint first_seq,sint nseqs,ALNPTR mult_aln)
{
	sint i,j;

	mult_aln->seqs = (SEQ *)ckrealloc(mult_aln->seqs,(first_seq+nseqs+1) * sizeof (SEQ));
	mult_aln->ft = (FT *)ckrealloc(mult_aln->ft,(first_seq+nseqs+1) * sizeof (FT));
	mult_aln->repeat = (REP *)ckrealloc(mult_aln->repeat,(first_seq+nseqs+1) * sizeof (REP));
	mult_aln->go = (GO *)ckrealloc(mult_aln->go,(first_seq+nseqs+1) * sizeof (GO));
	for(i=first_seq;i<first_seq+nseqs;i++) {
                mult_aln->seqs[i].name = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
		mult_aln->seqs[i].access = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
		mult_aln->seqs[i].nid = (char *)ckalloc((MAXNAMES+1) * sizeof (char));
                mult_aln->seqs[i].title = (char *)ckalloc((MAXTITLES+1) * sizeof (char));
                mult_aln->seqs[i].org = (char *)ckalloc((MAXORGANISMS+1) * sizeof (char));
                mult_aln->seqs[i].data=NULL;
                mult_aln->seqs[i].mask=NULL;
                mult_aln->seqs[i].weight=100;
                mult_aln->seqs[i].len=0;
                mult_aln->seqs[i].simgroup=0;
		for(j=0;j<MAXFTTYPE;j++)
			mult_aln->ft[i].nentries[j]=0;
		mult_aln->repeat[i].nrepeats=0;
		mult_aln->go[i].ngorefs=0;
        }
}

void free_aln(ALNPTR mult_aln)
{
	sint i,j,k;

	if(mult_aln->nseqs<=0) return;

        for(i=0;i<mult_aln->nseqs;i++) {
		ckfree(mult_aln->seqs[i].name);
		ckfree(mult_aln->seqs[i].title);
		ckfree(mult_aln->seqs[i].org);
		ckfree(mult_aln->seqs[i].data);
		ckfree(mult_aln->seqs[i].mask);
        	for(j=0;j<MAXFTTYPE;j++) {
        		for(k=0;k<mult_aln->ft[i].nentries[j];k++) {
				ckfree(mult_aln->ft[i].data[j][k].type);
				ckfree(mult_aln->ft[i].data[j][k].name);
			}
		}
        	for(k=0;k<mult_aln->go[i].ngorefs;k++) {
			ckfree(mult_aln->go[i].goref[k].id);
			ckfree(mult_aln->go[i].goref[k].desc);
		}
	}
	ckfree(mult_aln->seqs);
	ckfree(mult_aln->ft);
	ckfree(mult_aln->repeat);
	ckfree(mult_aln->go);
}
void alloc_seq(SEQ *seq,sint length)
{
	seq->data = (char *)ckalloc((length+2) * sizeof (char));
	seq->mask = (char *)ckalloc((length+2) * sizeof (char));
}

void realloc_seq(SEQ *seq,sint length)
{
	seq->data = (char *)ckrealloc(seq->data, (length+2) * sizeof (char));
	seq->mask = (char *)ckrealloc(seq->mask, (length+2) * sizeof (char));
}

void alloc_ft_entry(FT_ENTRY *data)
{
	data->type = (char *)ckalloc((10+2) * sizeof (char));
	data->name = (char *)ckalloc((100+2) * sizeof (char));
}

void alloc_taxon_entry(char *taxon)
{
        taxon = (char *)ckalloc((100+2) * sizeof (char));
}

void alloc_go_entry(GOREF *goref)
{
        goref->id = (char *)ckalloc((10+2) * sizeof (char));
        goref->desc = (char *)ckalloc((100+2) * sizeof (char));
}

int getargs(char *inline1,char *args[],int max)
{

        char    *inptr;
/*
#ifndef MAC
        char    *strtok(char *s1, const char *s2);
#endif
*/
        int     i;

        inptr=inline1;
        for (i=0;i<=max;i++)
        {
                if ((args[i]=strtok(inptr," \t\n"))==NULL)
                        break;
                inptr=NULL;
        }

        return(i);
}

int getintargs(char *inline1,sint *args,int max)
{

        char    *inptr;
        char    *tstring;
/*
#ifndef MAC
        char    *strtok(char *s1, const char *s2);
#endif
*/
        int     i;

        inptr=inline1;
        for (i=0;i<=max;i++)
        {
                if ((tstring=strtok(inptr," \t\n"))==NULL)
                        break;
                args[i]=atoi(tstring);
                inptr=NULL;
        }

        return(i);
}


/* 
   count the number of identities between two sequences
	seq1	first sequence
	seq2	second sequence
*/
float countid(SEQ seq1,SEQ seq2)
{
   char c1,c2;
   sint i;
   sint count,total;
   float score;

   count = total = 0;
   for (i=0;i<seq1.len && i<seq2.len;i++) {
     c1 = seq1.data[i];
     c2 = seq2.data[i];
     if (isalpha(c1) && isalpha(c2)) {
       total++;
       if (c1 == c2) count++;
     }

   }

   if(total==0) score=0;
   else
   score = 100.0 * (float)count / (float)total;
   return(score);

}

/*
   count the number of identities between two sequences and divide by length of sequence
        seq1    first sequence
        seq2    second sequence
*/

float countid1(SEQ seq1,SEQ seq2)
{
   char c1,c2;
   sint i;
   sint count,total;
   sint len;
   float score;

   len = count = total = 0;
   for (i=0;i<seq1.len && i<seq2.len;i++) {
     c1 = seq1.data[i];
     c2 = seq2.data[i];
     if(isalpha(c1) || isalpha(c2)) len++;
     if (isalpha(c1) && isalpha(c2)) {
       total++;
       if (c1 == c2) count++;
     }

   }

   if(len==0) score=0;
   else
   score = (float)(100.0 * (float)count / (float)len);
   return(score);

}


FILE *open_explicit_file(char *file_name)
{
        FILE * file_handle;

        if (*file_name == EOS) {
                error("Bad output file [%s]",file_name);
                return NULL;
        }
#ifdef VMS
        if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) {
#else
        if((file_handle=fopen(file_name,"w"))==NULL) {
#endif
                error("Cannot open output file [%s]",file_name);
                return NULL;
        }
        return file_handle;
}

void pos2col(char *seq,sint pstart,sint pend,sint *cstart,sint *cend)
{
        int i,ix;

        ix=0;
        if(pstart<0)
        {
                (*cstart)=-1;
                (*cend)=-1;
                return;
        }
        for(i=0;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(ix==pstart+1) break;
        }
        (*cstart)=i;

        if (pend<=pstart)
        {
                (*cend)=(*cstart);
                return;
        }

        i++;
        for(;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(ix==pend+1) break;
        }
        (*cend)=i;

}

void col2pos(char *seq,sint cstart,sint cend,sint *pstart,sint *pend)
{
        int i,ix;

        ix=0;
        if(cstart<0)
        {
                (*pstart)=-1;
                (*pend)=-1;
                return;
        }
        for(i=0;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(i==cstart) break;
        }
        (*pstart)=ix-1;

        if (cend<=cstart)
        {
                (*pend)=(*pstart);
                return;
        }

        i++;
        for(;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(i==cend) break;
        }
        (*pend)=ix-1;

}

sint overlap(sint f1,sint l1,sint f2, sint l2)
{
        sint len;

        len=MIN(l1,l2)-MAX(f1,f2)+1;
        return len;
}

sint check_ft_type(char *ft_type,char *ft_name,sint *type)
{
        sint ret=0;
        sint seq_type=0;

        if (strcmp(ft_type,"TRANSMEM")==0) {
                (*type)=TRANSMEM;
                ret=1;
        }
        else if (strcmp(ft_type,"BLOCK")==0) {
                (*type)=COREBLOCK;
                ret=1;
        }
        else if (strcmp(ft_type,"REGION")==0) {
                (*type)=REGION;
                ret=1;
        }
        else if (strcmp(ft_type,"PHYLOBLOCK")==0) {
                (*type)=PHYLOBLOCK;
                ret=1;
        }
        else if (strcmp(ft_type,"SEQERR")==0) {
                (*type)=SEQERRBLOCK;
                ret=1;
        }
        else if (strcmp(ft_type,"COIL")==0) {
                (*type)=COIL;
                ret=1;
        }
        else if (strcmp(ft_type,"LOWCOMP")==0) {
                (*type)=LOWC;
                ret=1;
        }
        return ret;
}

/*
*       fatal()
*
*       Prints error msg to stdout and exits.
*       Variadic parameter list can be passed.
*
*       Return values:
*               none
*/

void fatal( char *msg,...)
{
        va_list ap;

        va_start(ap,msg);
        fprintf(stdout,"\n\nFATAL ERROR: ");
        vfprintf(stdout,msg,ap);
        fprintf(stdout,"\n\n");
        va_end(ap);
        exit(1);
}

/*
*       error()
*
*       Prints error msg to stdout.
*       Variadic parameter list can be passed.
*
*       Return values:
*               none
*/

void error( char *msg,...)
{
        va_list ap;

        va_start(ap,msg);
        fprintf(stdout,"\n\nERROR: ");
        vfprintf(stdout,msg,ap);
        fprintf(stdout,"\n\n");
        va_end(ap);
}

/*
*       warning()
*
*       Prints warning msg to stdout.
*       Variadic parameter list can be passed.
*
*       Return values:
*               none
*/

void warning( char *msg,...)
{
        va_list ap;

        va_start(ap,msg);
        fprintf(stdout,"\n\nWARNING: ");
        vfprintf(stdout,msg,ap);
        fprintf(stdout,"\n\n");
        va_end(ap);
}

/*
*       info()
*
*       Prints info msg to stdout.
*       Variadic parameter list can be passed.
*
*       Return values:
*               none
*/

void info( char *msg,...)
{
        va_list ap;

        va_start(ap,msg);
        fprintf(stdout,"\n");
        vfprintf(stdout,msg,ap);
        va_end(ap);
}

void common_denominator(ALN mult_aln,GROUP_NODEPTR node)
{
        int i,j,k,l,n,ns;
        int s1,s2;
        Boolean found;

        node->taxon[0]='\0';

        for(i=0;i<node->nseqs;i++) {
		s1=node->seqs[i];
                switch(mult_aln.seqs[s1].lifedomain[0]) {
                        case 'A': case 'a':
                                node->arc++;
                        break;
                        case 'B': case 'b':
                                node->bac++;
                        break;
                        case 'E': case 'e':
                                node->euk++;
                        break;
                        case 'V': case 'v':
                                node->vir++;
                        break;
                }

	}

/* first find the first sequence that has a taxon list */
        found=FALSE;
        for(i=0;i<node->nseqs;i++) {
                s1=node->seqs[i];
                if (mult_aln.seqs[s1].ntaxons>0) {
                        found=TRUE;
                        break;
                }
        }
        if(found==FALSE) {
                strcpy(node->taxon,"Undetermined");
                return;
        }

/* then, check each taxon for this sequence in turn to see if it is common to all other sequences */
        for(k=mult_aln.seqs[s1].ntaxons-1;k>=0;k--) {
                n=ns=0;
                for(j=0;j<node->nseqs;j++) {
                        s2=node->seqs[j];
                        if (s1==s2 || mult_aln.seqs[s2].ntaxons==0) continue;
                        ns++;
                        found=FALSE;
                        for(l=mult_aln.seqs[s2].ntaxons-1;l>=0;l--) {
                                if(strcmp(mult_aln.seqs[s1].taxon[k],mult_aln.seqs[s2].taxon[l])==0) {
                                        n++;
                                        found=TRUE;
                                        break;
                                }
                        }
                        if(found==FALSE) {
                                break;
                        }
                }
                if(n==ns) {
                        strcpy(node->taxon,mult_aln.seqs[s1].taxon[k]);
                        break;
                }
        }
/* if we don't find anything common, use the life domains */
        if(node->taxon[0]=='\0') {
		node->taxon[0]='\0';
		if(node->arc>0) strcat(node->taxon,"A");
		if(node->bac>0) strcat(node->taxon,"B");
		if(node->euk>0) strcat(node->taxon,"E");
		if(node->vir>0) strcat(node->taxon,"V");
	}
}

void col2pos1(char *seq,sint cstart,sint *pstart)
{
        int i,ix;

        ix=0;
        if(cstart<0)
        {
                (*pstart)=-1;
                return;
        }
        for(i=0;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(i==cstart) break;
        }
        (*pstart)=ix-1;
}

void pos2col1(char *seq,sint pstart,sint *cstart)
{
        int i,ix;

        ix=0;
        if(pstart<0)
        {
                (*cstart)=-1;
                return;
        }
        for(i=0;i<strlen(seq);i++)
        {
                if(isalpha(seq[i])) ix++;
                if(ix==pstart+1) break;
        }
        (*cstart)=i;

}

