Lexical Scanner

Name

Lexical Scanner -- a general purpose lexical scanner.

Synopsis


#include <glib.h>


struct      GScanner;
GScanner*   g_scanner_new                   (GScannerConfig *config_templ);
struct      GScannerConfig;

void        g_scanner_input_file            (GScanner *scanner,
                                             gint input_fd);
void        g_scanner_sync_file_offset      (GScanner *scanner);
gint        g_scanner_stat_mode             (const gchar *filename);
void        g_scanner_input_text            (GScanner *scanner,
                                             const	gchar *text,
                                             guint text_len);
GTokenType  g_scanner_peek_next_token       (GScanner *scanner);
GTokenType  g_scanner_get_next_token        (GScanner *scanner);

guint       g_scanner_cur_line              (GScanner *scanner);
guint       g_scanner_cur_position          (GScanner *scanner);
GTokenType  g_scanner_cur_token             (GScanner *scanner);
GTokenValue g_scanner_cur_value             (GScanner *scanner);
gboolean    g_scanner_eof                   (GScanner *scanner);

guint       g_scanner_set_scope             (GScanner *scanner,
                                             guint scope_id);
void        g_scanner_scope_add_symbol      (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol,
                                             gpointer value);
void        g_scanner_scope_foreach_symbol  (GScanner *scanner,
                                             guint scope_id,
                                             GHFunc func,
                                             gpointer user_data);
gpointer    g_scanner_scope_lookup_symbol   (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol);
void        g_scanner_scope_remove_symbol   (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol);

void        g_scanner_freeze_symbol_table   (GScanner *scanner);
void        g_scanner_thaw_symbol_table     (GScanner *scanner);
gpointer    g_scanner_lookup_symbol         (GScanner *scanner,
                                             const gchar *symbol);

void        g_scanner_warn                  (GScanner *scanner,
                                             const gchar *format,
                                             ...);
void        g_scanner_error                 (GScanner *scanner,
                                             const gchar *format,
                                             ...);
void        g_scanner_unexp_token           (GScanner *scanner,
                                             GTokenType expected_token,
                                             const gchar *identifier_spec,
                                             const gchar *symbol_spec,
                                             const gchar *symbol_name,
                                             const gchar *message,
                                             gint is_error);
void        (*GScannerMsgFunc)              (GScanner *scanner,
                                             gchar *message,
                                             gint error);

void        g_scanner_destroy               (GScanner *scanner);

enum        GTokenType;
union       GTokenValue;
enum        GErrorType;
#define     G_CSET_a_2_z
#define     G_CSET_A_2_Z
#define     G_CSET_LATINC
#define     G_CSET_LATINS

#define     g_scanner_add_symbol            ( scanner, symbol, value )
#define     g_scanner_remove_symbol         ( scanner, symbol )
#define     g_scanner_foreach_symbol        ( scanner, func, data )

Description

The GScanner and its associated functions provide a general purpose lexical scanner.

FIXME: really needs an example and more detail, but I don't completely understand it myself. Look at gtkrc.c for some code using the scanner.

Details

struct GScanner

struct GScanner
{
  /* unused fields */
  gpointer		user_data;
  guint			max_parse_errors;
  
  /* g_scanner_error() increments this field */
  guint			parse_errors;
  
  /* name of input stream, featured by the default message handler */
  const gchar		*input_name;
  
  /* data pointer for derived structures */
  gpointer		derived_data;
  
  /* link into the scanner configuration */
  GScannerConfig	*config;
  
  /* fields filled in after g_scanner_get_next_token() */
  GTokenType		token;
  GTokenValue		value;
  guint			line;
  guint			position;
  
  /* fields filled in after g_scanner_peek_next_token() */
  GTokenType		next_token;
  GTokenValue		next_value;
  guint			next_line;
  guint			next_position;
  
  /* to be considered private */
  GHashTable		*symbol_table;
  gint			input_fd;
  const gchar		*text;
  const gchar		*text_end;
  gchar			*buffer;
  guint			scope_id;
  
  /* handler function for _warn and _error */
  GScannerMsgFunc	msg_handler;
};

The data structure representing a lexical scanner.

You should set input_name after creating the scanner, since it is used by the default message handler when displaying warnings and errors. If you are scanning a file, the file name would be a good choice.

The user_data and derived_data fields are not used. If you need to associate extra data with the scanner you can place them here.

If you want to use your own message handler you can set the msg_handler field. The type of the message handler function is declared by GScannerMsgFunc.


g_scanner_new ()

GScanner*   g_scanner_new                   (GScannerConfig *config_templ);

Creates a new GScanner. The config_templ structure specifies the initial settings of the scanner, which are copied into the GScanner config field. If you pass NULL then the default settings are used. (See g_scanner_config_template in gscanner.c for the defaults.)

config_templ :the initial scanner settings.
Returns :the new GScanner.


struct GScannerConfig

struct GScannerConfig
{
  /* Character sets
   */
  gchar		*cset_skip_characters;		/* default: " \t\n" */
  gchar		*cset_identifier_first;
  gchar		*cset_identifier_nth;
  gchar		*cpair_comment_single;		/* default: "#\n" */
  
  /* Should symbol lookup work case sensitive?
   */
  guint		case_sensitive : 1;
  
  /* Boolean values to be adjusted "on the fly"
   * to configure scanning behaviour.
   */
  guint		skip_comment_multi : 1;		/* C like comment */
  guint		skip_comment_single : 1;	/* single line comment */
  guint		scan_comment_multi : 1;		/* scan multi line comments? */
  guint		scan_identifier : 1;
  guint		scan_identifier_1char : 1;
  guint		scan_identifier_NULL : 1;
  guint		scan_symbols : 1;
  guint		scan_binary : 1;
  guint		scan_octal : 1;
  guint		scan_float : 1;
  guint		scan_hex : 1;			/* `0x0ff0' */
  guint		scan_hex_dollar : 1;		/* `$0ff0' */
  guint		scan_string_sq : 1;		/* string: 'anything' */
  guint		scan_string_dq : 1;		/* string: "\\-escapes!\n" */
  guint		numbers_2_int : 1;		/* bin, octal, hex => int */
  guint		int_2_float : 1;		/* int => G_TOKEN_FLOAT? */
  guint		identifier_2_string : 1;
  guint		char_2_token : 1;		/* return G_TOKEN_CHAR? */
  guint		symbol_2_token : 1;
  guint		scope_0_fallback : 1;		/* try scope 0 on lookups? */
};

Specifies the GScanner settings.

cset_skip_characters specifies which characters should be skipped by the scanner (the default is the whitespace characters: space, tab, carriage-return and line-feed).

cset_identifier_first specifies the characters which can start identifiers. (the default is G_CSET_a_2_z, "_", and G_CSET_A_2_Z).

cset_identifier_nth specifies the characters which can be used in identifiers, after the first character. The default is G_CSET_a_2_z, "_0123456789", G_CSET_A_2_Z, G_CSET_LATINS, G_CSET_LATINC.

cpair_comment_single specifies the characters at the start and end of single-line comments. The default is "#\n" which means that single-line comments start with a '#' and continue until a '\n' (end of line).

case_sensitive specifies if symbols are case sensitive.

The rest of the fields are flags which turn features on or off. FIXME: should describe these.


g_scanner_input_file ()

void        g_scanner_input_file            (GScanner *scanner,
                                             gint input_fd);

Prepares to scan a file.

scanner :a GScanner.
input_fd :a file descriptor.


g_scanner_sync_file_offset ()

void        g_scanner_sync_file_offset      (GScanner *scanner);

scanner : 


g_scanner_stat_mode ()

gint        g_scanner_stat_mode             (const gchar *filename);

Gets the file attributes. This is the st_mode field from the stat structure. See the stat() documentation.

filename :the file name.
Returns :the file attributes.


g_scanner_input_text ()

void        g_scanner_input_text            (GScanner *scanner,
                                             const	gchar *text,
                                             guint text_len);

Prepares to scan a text buffer.

scanner :a GScanner.
text :the text buffer to scan.
text_len :the length of the text buffer.


g_scanner_peek_next_token ()

GTokenType  g_scanner_peek_next_token       (GScanner *scanner);

Gets the next token, without removing it from the input stream. The token data is placed in the next_token, next_value, next_line, and next_position fields of the GScanner structure.

scanner :a GScanner.
Returns :the type of the token.


g_scanner_get_next_token ()

GTokenType  g_scanner_get_next_token        (GScanner *scanner);

Gets the next token, removing it from the input stream. The token data is placed in the token, value, line, and position fields of the GScanner structure.

scanner :a GScanner.
Returns :the type of the token.


g_scanner_cur_line ()

guint       g_scanner_cur_line              (GScanner *scanner);

Gets the current line in the input stream (counting from 1).

scanner :a GScanner.
Returns :the current line.


g_scanner_cur_position ()

guint       g_scanner_cur_position          (GScanner *scanner);

Gets the current position in the current line (counting from 0).

scanner :a GScanner.
Returns :the current position on the line.


g_scanner_cur_token ()

GTokenType  g_scanner_cur_token             (GScanner *scanner);

Gets the current token type. This is simply the token field in the GScanner structure.

scanner :a GScanner.
Returns :the current token type.


g_scanner_cur_value ()

GTokenValue g_scanner_cur_value             (GScanner *scanner);

Gets the current token value. This is simply the value field in the GScanner structure.

scanner :a GScanner.
Returns :the current token value.


g_scanner_eof ()

gboolean    g_scanner_eof                   (GScanner *scanner);

Returns TRUE if the scanner has reached the end of the file or text buffer.

scanner :a GScanner.
Returns :TRUE if the scanner has reached the end of the file or text buffer.


g_scanner_set_scope ()

guint       g_scanner_set_scope             (GScanner *scanner,
                                             guint scope_id);

Sets the current scope.

scanner :a GScanner.
scope_id :the new scope id.
Returns :the old scope id.


g_scanner_scope_add_symbol ()

void        g_scanner_scope_add_symbol      (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol,
                                             gpointer value);

Adds a symbol to the given scope.

scanner :a GScanner.
scope_id :the scope id.
symbol :the symbol to add.
value :the value of the symbol.


g_scanner_scope_foreach_symbol ()

void        g_scanner_scope_foreach_symbol  (GScanner *scanner,
                                             guint scope_id,
                                             GHFunc func,
                                             gpointer user_data);

scanner : 
scope_id : 
func : 
user_data : 


g_scanner_scope_lookup_symbol ()

gpointer    g_scanner_scope_lookup_symbol   (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol);

scanner : 
scope_id : 
symbol : 
Returns : 


g_scanner_scope_remove_symbol ()

void        g_scanner_scope_remove_symbol   (GScanner *scanner,
                                             guint scope_id,
                                             const gchar *symbol);

scanner : 
scope_id : 
symbol : 


g_scanner_freeze_symbol_table ()

void        g_scanner_freeze_symbol_table   (GScanner *scanner);

scanner : 


g_scanner_thaw_symbol_table ()

void        g_scanner_thaw_symbol_table     (GScanner *scanner);

scanner : 


g_scanner_lookup_symbol ()

gpointer    g_scanner_lookup_symbol         (GScanner *scanner,
                                             const gchar *symbol);

scanner : 
symbol : 
Returns : 


g_scanner_warn ()

void        g_scanner_warn                  (GScanner *scanner,
                                             const gchar *format,
                                             ...);

Outputs a warning message, via the GScanner message handler.

scanner :a GScanner.
format :the message format. See the printf() documentation.
... :the parameters to insert into the format string.


g_scanner_error ()

void        g_scanner_error                 (GScanner *scanner,
                                             const gchar *format,
                                             ...);

Outputs an error message, via the GScanner message handler.

scanner :a GScanner.
format :the message format. See the printf() documentation.
... :the parameters to insert into the format string.


g_scanner_unexp_token ()

void        g_scanner_unexp_token           (GScanner *scanner,
                                             GTokenType expected_token,
                                             const gchar *identifier_spec,
                                             const gchar *symbol_spec,
                                             const gchar *symbol_name,
                                             const gchar *message,
                                             gint is_error);

Outputs a message resulting from an unexpected token in the input stream. FIXME: I don't understand the arguments here.

scanner :a GScanner.
expected_token :the expected token.
identifier_spec :a string describing the expected type of identifier, or NULL to use the default "identifier" string.
symbol_spec :a string describing the expected type of identifier, or NULL to use the default "symbol" string.
symbol_name : 
message :a message string to output at the end of the warning/error, or NULL.
is_error :if TRUE it is output as an error. If False it is output as a warning.


GScannerMsgFunc ()

void        (*GScannerMsgFunc)              (GScanner *scanner,
                                             gchar *message,
                                             gint error);

scanner : 
message : 
error : 


g_scanner_destroy ()

void        g_scanner_destroy               (GScanner *scanner);

Frees all memory used by the GScanner.

scanner :a GScanner.


enum GTokenType

typedef enum
{
  G_TOKEN_EOF			=   0,
  
  G_TOKEN_LEFT_PAREN		= '(',
  G_TOKEN_RIGHT_PAREN		= ')',
  G_TOKEN_LEFT_CURLY		= '{',
  G_TOKEN_RIGHT_CURLY		= '}',
  G_TOKEN_LEFT_BRACE		= '[',
  G_TOKEN_RIGHT_BRACE		= ']',
  G_TOKEN_EQUAL_SIGN		= '=',
  G_TOKEN_COMMA			= ',',
  
  G_TOKEN_NONE			= 256,
  
  G_TOKEN_ERROR,
  
  G_TOKEN_CHAR,
  G_TOKEN_BINARY,
  G_TOKEN_OCTAL,
  G_TOKEN_INT,
  G_TOKEN_HEX,
  G_TOKEN_FLOAT,
  G_TOKEN_STRING,
  
  G_TOKEN_SYMBOL,
  G_TOKEN_IDENTIFIER,
  G_TOKEN_IDENTIFIER_NULL,
  
  G_TOKEN_COMMENT_SINGLE,
  G_TOKEN_COMMENT_MULTI,
  G_TOKEN_LAST
} GTokenType;

The possible types of token returned from each g_scanner_get_next_token() call.


union GTokenValue

union GTokenValue
{
  gpointer	v_symbol;
  gchar		*v_identifier;
  gulong	v_binary;
  gulong	v_octal;
  gulong	v_int;
  gdouble	v_float;
  gulong	v_hex;
  gchar		*v_string;
  gchar		*v_comment;
  guchar	v_char;
  guint		v_error;
};

A union holding the value of the token.


enum GErrorType

typedef enum
{
  G_ERR_UNKNOWN,
  G_ERR_UNEXP_EOF,
  G_ERR_UNEXP_EOF_IN_STRING,
  G_ERR_UNEXP_EOF_IN_COMMENT,
  G_ERR_NON_DIGIT_IN_CONST,
  G_ERR_DIGIT_RADIX,
  G_ERR_FLOAT_RADIX,
  G_ERR_FLOAT_MALFORMED
} GErrorType;

The possible errors, used in the v_error field of GTokenValue, when the token is a G_TOKEN_ERROR.


G_CSET_a_2_z

#define G_CSET_a_2_z	"abcdefghijklmnopqrstuvwxyz"

The set of lower-case ASCII alphabet characters. Used for specifying valid identifier characters in GScannerConfig.


G_CSET_A_2_Z

#define G_CSET_A_2_Z	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

The set of upper-case ASCII alphabet characters. Used for specifying valid identifier characters in GScannerConfig.


G_CSET_LATINC

#define     G_CSET_LATINC

Part of the set of extended characters in the Latin character sets. FIXME: lower case? Used for specifying valid identifier characters in GScannerConfig.


G_CSET_LATINS

#define     G_CSET_LATINS

Part of the set of extended characters in the Latin character sets. FIXME: upper case? Used for specifying valid identifier characters in GScannerConfig.


g_scanner_add_symbol()

#define     g_scanner_add_symbol( scanner, symbol, value )

Adds a symbol to the default scope. Deprecated in favour of g_scanner_scope_add_symbol().

scanner :a GScanner.
symbol :the symbol to add.
value :the value of the symbol.


g_scanner_remove_symbol()

#define     g_scanner_remove_symbol( scanner, symbol )

Removes a symbol from the default scope. Deprecated in favour of g_scanner_scope_remove_symbol().

scanner :a GScanner.
symbol :the symbol to remove.


g_scanner_foreach_symbol()

#define     g_scanner_foreach_symbol( scanner, func, data )

Calls a function for each symbol in the default scope. Deprecated in favour of g_scanner_scope_foreach_symbol().

scanner :a GScanner.
func :the function to call with each symbol.
data :data to pass to the function.