#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include "leon.h"

#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))

static void subgroup_blocks(char *infile,char *clusterfile,sint window,sint query_group) ;
static void all_blocks(sint window);

extern sint    gon250mt[];

GROUP *groups;
sint ngroups;
ALN mult_aln;
sint *secgroup;
sint *orggroup;
COMP_MATRIX matrix;
double **tmat;

int main(int argc,char **argv)
{
	sint i,j,k,n,s;
	sint norphans;
	sint status;
	sint query,maxgroup;
	char c;
	char infile[FILENAMELEN+1];
	char query_name[FILENAMELEN+1];
	char clusterfile[FILENAMELEN+1];
	sint maxres,*gapptr=NULL;
	double dscore;
	sint window;
	OPT opt;
	sint *istart,*iend;
	sint *orphans;
	double maxid;
	float cutoff;

	if(argc!=4) {
		fprintf(stdout,"Usage: %s input_aln query cluster_file\n",argv[0]);
		exit(1);
	}

	strcpy(infile,argv[1]);
	strcpy(query_name,argv[2]);
	strcpy(clusterfile,argv[3]);
	window=8;


        init_options(&opt);

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		error("No sequences in %s\n",infile);
		exit(1);
	}

/* find the query sequence */
        query=(-1);
        for(i=0;i<mult_aln.nseqs;i++) {
                if(strcasecmp(query_name,mult_aln.seqs[i].name)==0) query=i;
        }
        if(query==-1) {
                fprintf(stdout,"Error: the specified reference sequence (%s) was not found in the alignment\n",query_name);
                exit(1);
        }

/* count pairwise residue percent identities */
        tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) );
        for(i=0;i<mult_aln.nseqs;i++)
                tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) );

        for (i=0;i<mult_aln.nseqs;i++) {
                for (j=i+1;j<mult_aln.nseqs;j++) {
                        dscore = countid1(mult_aln.seqs[i],mult_aln.seqs[j]);
                        tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0;
/*fprintf(stdout,"%s %s %.2f\n",mult_aln.seqs[i].name,mult_aln.seqs[j].name,dscore);*/
                }
        }

/* read in the clusters */
	groups=(GROUP *)ckalloc((mult_aln.nseqs*2) * sizeof(GROUP));
	secgroup=(sint *)ckalloc((mult_aln.nseqs+1)*sizeof(sint));
	orggroup=(sint *)ckalloc((mult_aln.nseqs+1)*sizeof(sint));

	cutoff=0.3;

	ngroups=read_secator_groups(mult_aln,tmat,clusterfile,groups,secgroup,orggroup,cutoff);
	if(ngroups<=0) exit(1);

	/*for(i=0;i<mult_aln.nseqs;i++)
		fprintf(stdout,"SECGROUP %s %d %d\n",mult_aln.seqs[i].name,secgroup[i],orggroup[i]);*/

	ckfree(secgroup);
	ckfree(orggroup);

/* collect the orphans */
	orphans=(sint *)ckalloc((mult_aln.nseqs+1)*sizeof(sint));
	norphans=0;
	for(i=0;i<ngroups;i++) {
		if(groups[i].len==1)
			orphans[norphans++]=groups[i].seqs[0];
	}

	n=0;
	for(i=0;i<ngroups;i++) 
		if(groups[i].len>1) n++;
	fprintf(stdout,"Number of clusters : %d\n\n",n);

	n=0;
	for(i=0;i<ngroups;i++) {
		if(groups[i].len>1) {
			fprintf(stdout,"Cluster %d ; size=%d\n",n,groups[i].len);
			for(j=0;j<groups[i].len;j++) {
				fprintf(stdout,"%s\n",mult_aln.seqs[groups[i].seqs[j]].name);
			}
			fprintf(stdout,"\n");
			n++;
		}
	}

	if(norphans>0) {
		fprintf(stdout,"unclustered ; size=%d\n",norphans);
		for(j=0;j<norphans;j++) {
			fprintf(stdout,"%s\n",mult_aln.seqs[orphans[j]].name);
		}
	}
}
 

