#include <string.h>
#include <stdio.h>
#include <glib.h>
#include "debug.h"
#include "linguistics.h"

Functions
char *	linguistics_casefold (const char *in)

static char **	linguistics_get_special (const char str, const char end)

int	linguistics_compare (const char s1, const char s2, enum linguistics_cmp_mode mode)
	Compare two strings, trying to replace special characters (e.g. umlauts) in first string with plain letters.

char *	linguistics_expand_special (const char *str, int mode)
	Replace special characters in string (e.g. umlauts) with plain letters. This is useful e.g. to canonicalize a string for comparison.

char *	linguistics_next_word (char *str)

int	linguistics_search (const char *str)

static char *	linguistics_dup_utf8_char (const char *s)
	Copy one utf8 encoded char to newly allocated buffer.

void	linguistics_init (void)

void	linguistics_free (void)

Variables
static const char *	special [][3]

static const char *	upperlower []

static GHashTable *	casefold_hash

static GHashTable *	special_hash

Function Documentation

◆ linguistics_casefold()

char * linguistics_casefold ( const char * in )

References casefold_hash, dbg, and lvl_error.

Referenced by binmap_search_new(), country_search_new(), gui_internal_get_match_quality(), item_bin_sort_compare(), linguistics_compare(), navigation_cmd_get_exit_announce(), and removecase().

◆ linguistics_compare()

int linguistics_compare	(	const char *	s1,
		const char *	s2,
		enum linguistics_cmp_mode	mode
	)

Compare two strings, trying to replace special characters (e.g. umlauts) in first string with plain letters.

Parameters

s1	First string to process, for example, an item name from the map. Will be linguistics_casefold()ed before comparison.
s2	Second string to process, usually user supplied search string. Should be linguistics_casefold()ed before calling this function.
mode	set to composition of linguistics_cmp_mode flags to have s1 linguistics_expand_special()ed, allow matches shorter than whole s1, or
let	matches start from any word boundary within s1

Returns: 0 when strings are equal

References linguistics_casefold(), linguistics_cmp_expand, linguistics_cmp_partial, linguistics_cmp_words, linguistics_expand_special(), linguistics_next_word(), and s1.

Referenced by binmap_search_get_item(), map_parse_country_binfile(), and match().

◆ linguistics_dup_utf8_char()

static char * linguistics_dup_utf8_char ( const char * s )

static

Copy one utf8 encoded char to newly allocated buffer.

Parameters

s	pointer to the beginning of the char.

Returns: newly allocated nul-terminated string containing one utf8 encoded character.

References next.

Referenced by linguistics_init().

◆ linguistics_expand_special()

char * linguistics_expand_special	(	const char *	str,
		int	mode
	)

Replace special characters in string (e.g. umlauts) with plain letters. This is useful e.g. to canonicalize a string for comparison.

Parameters

str	string to process
mode	Replacement mode. 0=do nothing, 1=replace with single UTF character, 2=replace with multiple letters if the commonly used replacement has multitple letter (e.g. a-umlaut -> ae)

Returns: copy of string, with characters replaced

References dbg, linguistics_get_special(), lvl_debug, match(), and next.

Referenced by gui_internal_cmd_pois_item_selected(), gui_internal_find_next_possible_key(), gui_internal_get_match_quality(), item_bin_write_match(), and linguistics_compare().

◆ linguistics_free()

void linguistics_free ( void )

References casefold_hash, and special_hash.

Referenced by main_real().

◆ linguistics_get_special()

static char ** linguistics_get_special	(	const char *	str,
		const char *	end
	)

static

References special_hash.

Referenced by linguistics_expand_special().

◆ linguistics_init()

void linguistics_init ( void )

References casefold_hash, linguistics_dup_utf8_char(), s1, special, special_hash, and upperlower.

Referenced by main(), and main_real().

◆ linguistics_next_word()

char * linguistics_next_word ( char * str )

References LINGUISTICS_WORD_SEPARATORS_ASCII.

Referenced by item_bin_write_match(), and linguistics_compare().

◆ linguistics_search()

int linguistics_search ( const char * str )

Referenced by item_bin_write_match().

Variable Documentation

◆ casefold_hash

GHashTable* casefold_hash

static

Referenced by linguistics_casefold(), linguistics_free(), and linguistics_init().

◆ special

const char* special[][3]

static

Referenced by linguistics_init().

◆ special_hash

GHashTable * special_hash

static

Referenced by linguistics_free(), linguistics_get_special(), and linguistics_init().

◆ upperlower

const char* upperlower[]

static

Initial value:

= {
    
    "ÄËÏÖÜŸŐŰÁĆÉÍĹŃÓŔŚÚÝŹĄĘĮŲĊĖĠİĿŻĐĦŁŦÅŮČĎĚĽŇŘŠŤŽØĀĒĪŌŪĂĔĞĬŎŬÂĈÊĜĤÎĴÔŜÛŴŶÇĢĶĻŅŖŞŢÃĨÑÕŨÀÈÌÒÙÆĲŒÐŊÞ",
    "äëïöüÿőűáćéíĺńóŕśúýźąęįųċėġıŀżđħłŧåůčďěľňřšťžøāēīōūăĕğĭŏŭâĉêĝĥîĵôŝûŵŷçģķļņŗşţãĩõñũàèìòùæĳœðŋþ",
    
    "АБВГҐЃДЂЕЄЁЖЗИЙКЌЛЉМНЊОПРСТУФХЦЏЧШЩЪЫЬЭЮЯІЇЎ",
    "абвгґѓдђеєёжзийкќлљмнњопрстуфхцџчшщъыьэюяіїў",
 
    NULL
}

Referenced by linguistics_init().

Functions

Variables

Function Documentation

◆ linguistics_casefold()

◆ linguistics_compare()

◆ linguistics_dup_utf8_char()

◆ linguistics_expand_special()

◆ linguistics_free()

◆ linguistics_get_special()

◆ linguistics_init()

◆ linguistics_next_word()

◆ linguistics_search()

Variable Documentation

◆ casefold_hash

◆ special

◆ special_hash

◆ upperlower