/* 
 * linuxfr-captcha : breaks linuxfr captchas
 *
 * That's a quick hack, don't expect anything from it :)
 * http://linuxfr.org/user_new.html
 * http://linuxfr.org/image.png
 *
 * To compile : 
 * g++ -O2 captcha.c `sdl-config --libs --cflags` -lSDL_image -o captcha
 * 
 * Copyright (C) 2005 Stephane Marchesin
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "SDL.h"
#include "SDL_image.h"

typedef struct 
{
	float score; 
	int letter;
	int xpos,ypos;
} 
match;

#define LETTER_WIDTH 8
#define LETTER_HEIGHT 12
#define LETTERS 7
#define DOUBTFUL 0x80808080

// finds the worse of our matches
inline int worst_match(match* l,int nb)
{
	int sm=0;
	for(int i=1;i<nb;i++)
	{
		if (l[i].score<l[sm].score)
			sm=i;
	}
	return sm;
}

inline char numtochar(int n)
{
	if (n<26)
		return n+'A';
	else
		return n-26+'a';
}

// matches a letter with the picture @ (x,y)
float do_match(int n,SDL_Surface* a,SDL_Surface* pic,int px,int py)
{
	int x,y,xx,yy;
	if (n<26)
		y=0;
	else
		y=LETTER_HEIGHT;

	x=(n%26)*(LETTER_WIDTH+2);

	int count=LETTER_WIDTH*LETTER_HEIGHT;

	// match with our pixels
	int matched=0;
	for(yy=0;yy<LETTER_HEIGHT;yy++)
	for(xx=0;xx<LETTER_WIDTH;xx++)
	{
		// if this pixel is doubtful, don't use it for matching
		if (((Uint32*)pic->pixels)[(px+xx)+(py+yy)*pic->w]==DOUBTFUL)
			continue;
		// if we have a pixel which is on the font but not on the picture, it's not that one
		if (
				((((Uint32*)a->pixels)[(x+xx)+(y+yy)*a->w]&0xffffff)==0)
				&&
				((((Uint32*)pic->pixels)[(px+xx)+(py+yy)*pic->w]&0xffffff)!=0)
				)
			return -10.f;

		if ((((Uint32*)a->pixels)[(x+xx)+(y+yy)*a->w]&0xffffff)==((((Uint32*)pic->pixels)[(px+xx)+(py+yy)*pic->w]&0xffffff)))
			matched++;
	}

	return (float)matched/(float)count;
}

int overlap(int a,int x,int y,match* matchlist)
{
	for(int i=0;i<LETTERS;i++)
	{
		if (matchlist[i].score<0.)
			continue;
		if (
			((x>=matchlist[i].xpos)&&(x<matchlist[i].xpos+LETTER_WIDTH))
			||
			((matchlist[i].xpos>=x)&&(matchlist[i].xpos<x+LETTER_WIDTH))
		   )
			return i;
	}
	return -1;
}

int compare(const void* m1,const void* m2)
{
	if (((match*)m1)->xpos<((match*)m2)->xpos)
		return -1;
	else
		return 1;
}

main(int argc, char* argv[])
{
	SDL_Init(SDL_INIT_EVERYTHING);

	if (argc<2)
	{
		printf("Usage : %s file.png\n",argv[0]);
		exit(0);
	}
	SDL_Surface* l;
	l = IMG_Load(argv[1]);
	SDL_Surface* s=SDL_CreateRGBSurface(0,l->w,l->h,32,0xff000000, 0x00ff0000,0x0000ff00, 0x000000ff);
	SDL_BlitSurface(l,NULL,s,NULL);

	// load the font
	SDL_Surface* alphabet=IMG_Load("font.png");

	// find the darkest color, i.e. the text color
	int min=256*3;
	int x,y,a;
	for(y=0;y<s->h;y++)
		for(x=0;x<s->w;x++)
		{
			Uint8 r,g,b;
			SDL_GetRGB(((Uint32*)s->pixels)[x+y*s->w],s->format,&r,&g,&b);
			if (r+g+b<min)
			{
				min=r+g+b;
			}
		}
	SDL_Surface* s2=SDL_CreateRGBSurface(0,s->w,s->h,32,0xff000000, 0x00ff0000,0x0000ff00, 0x000000ff);
	// threshold
	for(y=0;y<s->h;y++)
		for(x=0;x<s->w;x++)
		{
			Uint8 r,g,b;
			SDL_GetRGB(((Uint32*)s->pixels)[x+y*s->w],s->format,&r,&g,&b);
			if (r+g+b<=min)
				((Uint32*)s2->pixels)[x+y*s2->w]=0x00000000;
			else
				((Uint32*)s2->pixels)[x+y*s2->w]=0xffffffff;
		}
	// find horizontal lines of more than LETTER_WIDTH+2 pixels (they mess up with a small number of cases) and mark them as doubtful
	int linesize;
	for(y=0;y<s->h;y++)
	{
		linesize=0;
		for(x=0;x<s->w;x++)
		{
			if (((Uint32*)s2->pixels)[x+y*s2->w]==0)
				linesize++;
			else
			{
				// found one ? if so, mark it
				if (linesize>LETTER_WIDTH+2)
				{
					for(int ii=1;ii<=linesize;ii++)
					{
						((Uint32*)s2->pixels)[x+y*s2->w-ii]=DOUBTFUL;
					}
				}
				linesize=0;
			}
		}
		if (linesize>LETTER_WIDTH+2)
		{
			for(int ii=1;ii<=linesize;ii++)
			{
				((Uint32*)s2->pixels)[x+y*s2->w-ii]=DOUBTFUL;
			}
		}
	}

	// find the letters (we use a brute force approach)
	match matchlist[LETTERS];

	float best_score=-100000.f;
	char resu[LETTERS+1];
	resu[LETTERS]=0;

	// check each line separately
	for(y=0;y<s2->h-LETTER_HEIGHT;y++)
	{
		for(int ii=0;ii<LETTERS;ii++)
			matchlist[ii].score=-1.f;

		for(x=0;x<s2->w-LETTER_WIDTH;x++)
		{
			for(a=0;a<2*26;a++)
			{
				float m=do_match(a,alphabet,s2,x,y);
				// maybe we overlap with someone else, in which case we only keep the best of the 2
				int o=overlap(a,x,y,matchlist);
				if (o>=0)
				{
					if (m>matchlist[o].score)
					{
						matchlist[o].score=m;
						matchlist[o].xpos=x;
						matchlist[o].ypos=y;
						matchlist[o].letter=a;
					}
				}
				else
				{
					int worst=worst_match(matchlist,LETTERS);
					if (m>matchlist[worst].score)
					{
						matchlist[worst].score=m;
						matchlist[worst].xpos=x;
						matchlist[worst].ypos=y;
						matchlist[worst].letter=a;
					}
				}
			}
			// we have at least 2 letters matched so we can use letter-wide steps
			if (matchlist[2].score>0.f)
				x+=LETTER_WIDTH;
		}
		qsort(matchlist,LETTERS,sizeof(match),compare);

		float score=0;
		for(int iii=0;iii<LETTERS;iii++)
			score+=matchlist[iii].score;

		if (score>best_score)
		{
			best_score=score;
			for(int ii=0;ii<LETTERS;ii++)
				resu[ii]=numtochar(matchlist[ii].letter);
		}
	}	

	printf("%s\n",resu);

	SDL_Quit();
}


