proboard/fuzzy2.cpp

294 lines
6.5 KiB
C++

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//-------------------
// Local, static data
//-------------------
static char *Text; // pointers to search string
static char *Pattern; // pointers to search string
static int Textloc; // current search position in Text
static int Plen; // length of Pattern
static int Degree; // max degree of allowed mismatch
static int *Ldiff; // dynamic difference array
static int *Rdiff; // dynamic difference array
static int *Loff; // used to calculate start of match
static int *Roff; // used to calculate start of match
static int *allocated;
static void near
App_init( char *pattern,
char *text,
int degree )
{
int i;
//----------------
// save parameters
//----------------
Text = text;
Pattern = pattern;
Degree = degree;
//-----------
// initialize
//-----------
Plen = strlen( pattern );
Ldiff = allocated = (int *) malloc( sizeof( int ) *
( Plen + 1 ) * 4 );
Rdiff = Ldiff + Plen + 1;
Loff = Rdiff + Plen + 1;
Roff = Loff + Plen + 1;
for ( i = 0; i <= Plen; i++ )
{
//-------------------------------------
// initial values for right-hand column
//-------------------------------------
Rdiff[ i ] = i;
Roff [ i ] = 1;
}
//-------------------------
// current offset into Text
//-------------------------
Textloc = -1;
}
static void near
App_next( char **start,
char **end,
int *howclose )
{
int *temp;
int a;
int b;
int c;
int i;
*start = NULL;
while ( *start == NULL )
{
//------------------------
// start computing columns
//------------------------
if ( Text[ ++Textloc ] == '\0' )
{
//-----------------------
// out of text to search!
//-----------------------
break;
}
temp = Rdiff; // move right-hand column to left >>
Rdiff = Ldiff; // so that we can compute new >>
Ldiff = temp; // right-hand column
Rdiff[ 0 ] = 0; // top (boundary) row
//----------------------------
// and swap offset arrays, too
//----------------------------
temp = Roff;
Roff = Loff;
Loff = temp;
Roff[ 1 ] = 0;
for ( i = 0; i < Plen; i++ )
{
//====================
// Run through pattern
//====================
//--------------------------------------------------
// compute a, b, & c as the three adjacent cells ...
//--------------------------------------------------
if ( toupper( Pattern[ i ] ) == toupper( Text[ Textloc ] ) )
{
a = Ldiff[ i ];
}
else
{
a = Ldiff[ i ] + 1;
}
b = Ldiff[ i + 1 ] + 1;
c = Rdiff[ i ] + 1;
//-----------------
// now pick minimum
//-----------------
if ( b < a )
{
a = b;
}
if ( c < a )
{
a = c;
}
//----------
// and store
//----------
Rdiff[ i + 1] = a;
}
//========================
// Now update offset array
//========================
//-------------------------------------------------
// the values in the offset arrays are added to the
// current location to determine the beginning of
// the mismatched substring. (see text for details)
//-------------------------------------------------
if ( Plen > 1 )
{
for ( i = 2; i <= Plen; i++ )
{
if ( Ldiff[ i - 1 ] < Rdiff[ i ] )
{
Roff[ i ] = Loff[ i - 1 ] - 1;
}
else if ( Rdiff[ i - 1 ] < Rdiff[ i ] )
{
Roff[ i ] = Roff[ i - 1 ];
}
else if ( Ldiff[ i ] < Rdiff[ i ] )
{
Roff[ i ] = Loff[ i ] - 1;
}
else
{
// Ldiff[ i-1] == Rdiff[i]
Roff[ i ] = Loff[ i - 1 ] - 1;
}
}
}
//--------------------------------------
// now, do we have an approximate match?
//--------------------------------------
if ( Rdiff[ Plen ] <= Degree )
{
//-----------
// indeed so!
//-----------
*end = Text + Textloc;
*start = *end + Roff[ Plen ];
*howclose = Rdiff[ Plen ];
}
}
}
void
App_End()
{
free( allocated );
}
int
fuzzy_search( char *pattern,
char *text,
int degree )
{
int min = 0x7FFF;
int howclose;
char *begin;
char *end;
degree = (
strlen( pattern ) * ( 100 - degree )
)
/ 100;
App_init( pattern,
text,
degree );
App_next( & begin,
& end,
& howclose );
while( begin )
{
if ( howclose < min )
{
min = howclose;
}
App_next( & begin,
& end,
& howclose );
}
App_End();
if ( min <= degree )
{
return (
(
strlen( pattern ) - degree
)
* 100
)
/ strlen( pattern );
}
else
{
return -1;
}
}