/* --- Copyright University of Sussex 1993. All rights reserved. ----------
 > File:            C.all/lib/lr_parser/lib/lr_example_2.p
 > Purpose:         LALR(1) Parser Generator: example (2)
 > Author:          Robert John Duncan, Nov 27 1992 (see revisions)
 > Documentation:	HELP * DEFINE_PARSER
 > Related Files:	LIB * DEFINE_PARSER, * LR_EXAMPLE_1
 */

compile_mode:pop11 +strict;

/*
 *  A parser for ANSI C, based on the grammar in K&R (2nd edition).
 *
 *  This merely recognises valid C programs; it doesn't do anything with
 *  them. The minimal environment handling is necessary to disambiguate
 *  between typedef names and other identifiers: it's incomplete in so
 *  far as it doesn't respect scoping (all typedef names are global).
 *
 *  The grammar appears quite liberal because it does no semantic
 *  checking, allowing many obviously silly programs to pass, e.g:
 *
 *  	extern static typedef foo = "Hello world"++;
 *
 *  No attempt has been made to report syntax errors meaningfully.
 *
 *  To run the parser, it has to be applied to a suitable lexical
 *  analyser procedure: a possible definition is given in the second
 *  half of this file. You should load the whole file, and then try:
 *
 *  	ANSI_C(c_tokeniser(charin));
 *
 *  The tokeniser simply strips all pre-processor directives, so real
 *  programs should be run through CPP first (e.g. using cc -E).
 *
 *	The parser could be made more efficient: using the _________raw_input option
 *	and changing the lexical analyser accordingly would be one
 *	improvement; another would be to use operator precedences in place
 *	of the long hierarchy of expression types.
 */

section;

uses lr_parser;

define :parser global ANSI_C(procedure input);

options
	conflicts = 1/0
		;;; number of conflicts generated by the grammar:
		;;; 1 S/R conflict from the "dangling-else" ambiguity
	input,
		;;; the parser reads its input from the procedure argument
	results = 0
		;;; default number of results returned by rules
endoptions

definitions

	dlocal popprompt = 'C: ';
		;;; for interactive testing

	define lconstant idclass =
		;;; table of special identifiers: typedef names and enum constants
		newanyproperty([], 64, 1, 56, false, false, "perm", false, false);
	enddefine;
	;;;
	clearproperty(idclass);	;;; clear it on each call

	lvars typedefs = true;
		;;; enables recognition of typedef names:
		;;; setting this <false> at crucial points allows typedef names to
		;;; be read as ordinary identifiers so that they can be rebound

	define lconstant is_typedef_name(id);
		lvars id;
		typedefs and (idclass(id) == "typedef");
	enddefine;

	define lconstant is_enumeration_constant(id);
		lvars id;
		idclass(id) == "enum";
	enddefine;

enddefinitions

tokens

	/* Reserved words */
	'auto' 'break' 'case' 'char' 'const' 'continue' 'default' 'do'
	'double' 'else' 'enum' 'extern' 'float' 'for' 'goto' 'if' 'int'
	'long' 'register' 'return' 'short' 'signed' 'sizeof' 'static'
	'struct' 'switch' 'typedef' 'union' 'unsigned' 'void' 'volatile'
	'while'

	/* Operators, separators etc. */
	'!' '!=' '%' '%=' '&&' '&' '&=' '(' ')' '*' '*=' '+' '++' '+='
	',' '-' '--' '-=' '->' '.' '...' '/' '/=' ':' ';' '<' '<<' '<<='
	'<=' '=' '==' '>' '>=' '>>' '>>=' '?' '[' ']' '^' '^=' '{' '|'
	'|=' '||' '}' '~'

	/* Special tokens */
	TYPEDEF_NAME			: is_typedef_name;
	ENUMERATION_CONSTANT	: is_enumeration_constant;
	IDENTIFIER				: isword;
	INTEGER_CONSTANT		: isintegral;
	CHARACTER_CONSTANT		: isintegral;	/* never seen */
	STRING_CONSTANT			: isstring;
	FLOATING_CONSTANT		: isdecimal;

endtokens

rules

	translation_unit ::=
		external_declaration
	|	translation_unit external_declaration
	;

	external_declaration ::=
		function_definition
	|	declaration
	;

	function_definition ::=
	/*	This causes conflicts
		OPT_declaration_specifiers:1 declarator:1 OPT_declaration_list
			compound_statement
	*/
		declaration_specifiers:1 declarator:1 OPT_declaration_list
			compound_statement
	|	declarator:1 OPT_declaration_list compound_statement
	;

	declaration ::=
		declaration_specifiers:spec OPT_init_declarator_list:ids ';'
			{	if spec == "typedef" then
					;;; declare each identifier as a new typedef name
					lvars id;
					for id in ids do
						spec -> idclass(id);
					endfor;
				endif;
				;;; enable typedef names for the next declaration
				true -> typedefs;
			}
	;

	OPT_declaration_list ::=
		declaration_list
	|	/* empty */
	;

	declaration_list ::=
		declaration
	|	declaration_list declaration
	;

	OPT_declaration_specifiers:1 ::=
		declaration_specifiers:1
	|	/* empty */
			{ false }
	;

	declaration_specifiers:1 ::=
		storage_class_specifier:spec1 OPT_declaration_specifiers:spec2
			{	if spec1 == "typedef" then spec1 else spec2 endif;
			}
	|	type_specifier OPT_declaration_specifiers
	|	type_qualifier OPT_declaration_specifiers
	;

	storage_class_specifier:1 ::=
		'auto'
	|	'register'
	|	'static'
	|	'extern'
	|	'typedef'
	;

	type_specifier ::=
	/*  This rule added to cope with typedef names:
		once we've read a single type_specifier, we disable further
		recognition of typedef names for a possibly following declarator
	*/
		_type_specifier
			{	false -> typedefs;
			}
	;

	_type_specifier ::=
		'void'
	|	'char'
	|	'short'
	|	'int'
	|	'long'
	|	'float'
	|	'double'
	|	'signed'
	|	'unsigned'
	|	struct_or_union_specifier
	|	enum_specifier
	|	TYPEDEF_NAME
	;

	type_qualifier ::=
		'const'
	|	'volatile'
	;

	struct_or_union_specifier ::=
		struct_or_union OPT_identifier '{' struct_declaration_list '}'
	|	struct_or_union identifier
	;

	struct_or_union ::=
		'struct'
	|	'union'
	;

	struct_declaration_list ::=
		struct_declaration
	|	struct_declaration_list struct_declaration
	;

	OPT_init_declarator_list:1 ::=
		init_declarator_list:1
	|	/* empty */
			{ [] }
	;

	init_declarator_list:1 ::=
		init_declarator:id
			{	[^id];
			}
	|	init_declarator_list:ids ','
			{	;;; disable typedef names for the next declarator
				false -> typedefs;
			}
		init_declarator:id
			{	id :: ids;
			}
	;

	init_declarator:1 ::=
		declarator:1
	|	declarator:id '=' initializer
	;

	struct_declaration ::=
		specifier_qualifier_list struct_declarator_list ';'
			{	;;; enable typedef names for the next struct_declaration
				true -> typedefs;
			}
	;

	OPT_specifier_qualifier_list ::=
		specifier_qualifier_list
	|	/* empty */
	;

	specifier_qualifier_list ::=
		type_specifier OPT_specifier_qualifier_list
	|	type_qualifier OPT_specifier_qualifier_list
	;

	struct_declarator_list ::=
		struct_declarator
	|	struct_declarator_list ','
			{	;;; disable typedef names for the next struct_declarator
				false -> typedefs;
			}
		struct_declarator
	;

	struct_declarator ::=
		declarator:1
	|	declarator:1 ':' constant_expression
	|	':'
			{	;;; enable typedef names for the constant expression
				true -> typedefs;
			}
		constant_expression
	;

	enum_specifier ::=
		'enum' OPT_identifier '{' enumerator_list:ids '}'
			{	;;; declare each identifier as a new enumeration constant
				lvars id;
				for id in ids do
					"enum" -> idclass(id);
				endfor;
			}
	|	'enum' identifier
	;

	enumerator_list:1 ::=
		enumerator:id
			{ [^id] }
	|	enumerator_list:ids ',' enumerator:id
			{ id :: ids }
	;

	enumerator:1 ::=
		IDENTIFIER
	|	IDENTIFIER '=' constant_expression
	;

	declarator:1 ::=
		OPT_pointer direct_declarator:1
	;

	direct_declarator:1 ::=
		IDENTIFIER:id
			{	;;; -id- is the name being declared; once we've seen it,
				;;; we enable typedef names for subsequent parameters,
				;;; array bounds, initializers etc.
				true -> typedefs;
				id;
			}
	|	'(' declarator:id ')'
			{ id }
	|	direct_declarator '[' OPT_constant_expression ']'
	|	direct_declarator '(' OPT_parameter_type_list ')'
	|	direct_declarator '(' identifier_list ')'
	;

	OPT_pointer ::=
		pointer
	|	/* empty */
	;

	pointer ::=
		'*' OPT_type_qualifier_list
	|	'*' OPT_type_qualifier_list pointer
	;

	OPT_type_qualifier_list ::=
		type_qualifier_list
	|	/* empty */
	;

	type_qualifier_list ::=
		type_qualifier
	|	type_qualifier_list type_qualifier
	;

	OPT_parameter_type_list ::=
		parameter_type_list
	|	/* empty */
	;

	parameter_type_list ::=
		parameter_list
	|	parameter_list ',' '...'
	;

	parameter_list ::=
		parameter_declaration
	|	parameter_list ',' parameter_declaration
	;

	parameter_declaration ::=
	/*	These make it impossible to handle typedef names correctly
		declaration_specifiers declarator
	|	declaration_specifiers OPT_abstract_declarator
	*/
		declaration_specifiers _parameter_declarator
	;

	_parameter_declarator ::=
	/*	This rule added to cope with typedef names:
		at the start of a parameter_declaration, -typedefs- is always
		<true> to allow for a typedef name occurring in a leading
		type_specifier. Once a type_specifier has been read, -typedefs-
		is set <false> so that subsequent typedef names can be rebound.
		The OPT_pointer symbol at the start of this rule causes the first
		token of the direct_parameter_declarator to be read before the
		action to reset -typedefs- is done: thus a single unbracketed
		typedef name following a type_specifier will be read as an
		IDENTIFIER and hence as the start of a direct declarator. A
		bracketed typedef name, or a typedef name occurring without a
		leading type_specifier is read as a TYPEDEF_NAME.
	*/
		OPT_pointer
			{	true -> typedefs;
			}
		OPT_direct_parameter_declarator
	;

	OPT_direct_parameter_declarator ::=
		/* empty */
	|	direct_declarator
	|	direct_abstract_declarator
	;

	identifier_list ::=
		IDENTIFIER
	|	identifier_list ',' IDENTIFIER
	;

	initializer ::=
		assignment_expression
	|	'{' initializer_list '}'
	|	'{' initializer_list ',' '}'
	;

	initializer_list ::=
		initializer
	|	initializer_list ',' initializer
	;

	type_name ::=
		specifier_qualifier_list
			{	;;; enable typedef names, in case a preceding type_specifier
				;;; turned them off
				true -> typedefs;
			}
		OPT_abstract_declarator
	;

	OPT_abstract_declarator ::=
		abstract_declarator
	|	/* empty */
	;

	abstract_declarator ::=
		pointer
	|	OPT_pointer direct_abstract_declarator
	;

	direct_abstract_declarator ::=
		'(' abstract_declarator ')'
	/*	These cause conflicts
	|	OPT_direct_abstract_declarator '[' OPT_constant_expression ']'
	|	OPT_direct_abstract_declarator '(' OPT_parameter_type_list ')'
	*/
	|	'[' OPT_constant_expression ']'
	|	'(' OPT_parameter_type_list ')'
	|	direct_abstract_declarator '[' OPT_constant_expression ']'
	|	direct_abstract_declarator '(' OPT_parameter_type_list ')'
	;

	statement ::=
		labeled_statement
	|	expression_statement
	|	compound_statement
	|	selection_statement
	|	iteration_statement
	|	jump_statement
	;

	labeled_statement ::=
		IDENTIFIER ':' statement
	|	'case' constant_expression ':' statement
	|	'default' ':' statement
	;

	expression_statement ::=
		OPT_expression ';'
	;

	compound_statement ::=
		'{' OPT_declaration_list OPT_statement_list '}'
	;

	OPT_statement_list ::=
		statement_list
	|	/* empty */
	;

	statement_list ::=
		statement
	|	statement_list statement
	;

	selection_statement ::=
		'if' '(' expression ')' statement
	|	'if' '(' expression ')' statement 'else' statement
	|	'switch' '(' expression ')' statement
	;

	iteration_statement ::=
		'while' '(' expression ')' statement
	|	'do' statement 'while' '(' expression ')' ';'
	|	'for' '(' OPT_expression ';' OPT_expression ';' OPT_expression ')'
			statement
	;

	jump_statement ::=
		'goto' IDENTIFIER ';'
	|	'continue' ';'
	|	'break' ';'
	|	'return' OPT_expression ';'
	;

	OPT_expression ::=
		expression
	|	/* empty */
	;

	expression ::=
		assignment_expression
	|	expression ',' assignment_expression
	;

	assignment_expression ::=
		conditional_expression
	|	unary_expression assignment_operator assignment_expression
	;

	assignment_operator ::=
		'='
	|	'*='
	|	'/='
	|	'%='
	| 	'+='
	|	'-='
	|	'<<='
	|	'>>='
	|	'&='
	|	'^='
	|	'|='
	;

	conditional_expression ::=
		logical_OR_expression
	|	logical_OR_expression '?' expression ':' conditional_expression
	;

	OPT_constant_expression ::=
		constant_expression
	|	/* empty */
	;

	constant_expression ::=
		conditional_expression
	;

	logical_OR_expression ::=
		logical_AND_expression
	|	logical_OR_expression '||' logical_AND_expression
	;

	logical_AND_expression ::=
		inclusive_OR_expression
	|	logical_AND_expression '&&' inclusive_OR_expression
	;

	inclusive_OR_expression ::=
		exclusive_OR_expression
	|	inclusive_OR_expression '|' exclusive_OR_expression
	;

	exclusive_OR_expression ::=
		AND_expression
	|	exclusive_OR_expression '^' AND_expression
	;

	AND_expression ::=
		equality_expression
	|	AND_expression '&' equality_expression
	;

	equality_expression ::=
		relational_expression
	|	equality_expression '==' relational_expression
	|	equality_expression '!=' relational_expression
	;

	relational_expression ::=
		shift_expression
	|	relational_expression '<' shift_expression
	|	relational_expression '>' shift_expression
	|	relational_expression '<=' shift_expression
	|	relational_expression '>=' shift_expression
	;

	shift_expression ::=
		additive_expression
	|	shift_expression '<<' additive_expression
	|	shift_expression '>>' additive_expression
	;

	additive_expression ::=
		multiplicative_expression
	|	additive_expression '+' multiplicative_expression
	|	additive_expression '-' multiplicative_expression
	;

	multiplicative_expression ::=
		cast_expression
	|	multiplicative_expression '*' cast_expression
	|	multiplicative_expression '/' cast_expression
	|	multiplicative_expression '%' cast_expression
	;

	cast_expression ::=
		unary_expression
	|	'(' type_name ')' cast_expression
	;

	unary_expression ::=
		postfix_expression
	|	'++' unary_expression
	|	'--' unary_expression
	|	unary_operator cast_expression
	|	'sizeof' unary_expression
	|	'sizeof' '(' type_name ')'
	;

	unary_operator ::=
		'&'
	|	'*'
	|	'+'
	|	'-'
	|	'~'
	|	'!'
	;

	postfix_expression ::=
		primary_expression
	|	postfix_expression '[' expression ']'
	|	postfix_expression '(' OPT_argument_expression_list ')'
	|	postfix_expression '.' identifier
	|	postfix_expression '->' identifier
	|	postfix_expression '++'
	|	postfix_expression '--'
	;

	primary_expression ::=
		IDENTIFIER
	|	constant
	|	string
	|	'(' expression ')'
	;

	OPT_argument_expression_list ::=
		argument_expression_list
	|	/* empty */
	;

	argument_expression_list ::=
		assignment_expression
	|	argument_expression_list ',' assignment_expression
	;

	constant ::=
		INTEGER_CONSTANT
	|	CHARACTER_CONSTANT
	|	FLOATING_CONSTANT
	|	ENUMERATION_CONSTANT
	;

	string ::=
		STRING_CONSTANT
	|	string STRING_CONSTANT
	;

	OPT_identifier ::=
		identifier
	|	/* empty */
	;

	identifier ::=
		IDENTIFIER
	|	TYPEDEF_NAME
	;

endrules

enddefine;


;;; =======================================================================
;;; A sample tokeniser for C: this does the absolute minimum, but
;;; provides suitable test input for the -ANSI_C- procedure

/*
 *	Character types
 */

lconstant macro (
	ALPHA	= 1,
	DIGIT	= 2,
	SQUOTE	= 3,
	DQUOTE	= 4,
	DOT		= 5,
	SHIFT	= 6,
	ERR		= 7,
);

lconstant
	chartype = {%

	;;;	000		001		002		003		004		005		006		007
		ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,

	;;;	BS		TAB		NL		VT		FF		CR		016		017
		space,	space,	space,	space,	space,	space,	ERR,	ERR,

	;;;	020		021		022		023		024		025		026		027
		ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,

	;;;	030		031		032		033		034		035		036		037
		ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,	ERR,

	;;;	SP		!		"		#		$		%		&		'
		space,	'=',	DQUOTE,	ERR,	ERR,	'=',	'&=',	SQUOTE,

	;;; (		)		*		+		,		-		.		/
		"(",	")",	'=',	'+=',	",",	'-=>',	DOT,	'=',

	;;;	0		1		2		3		4		5		6		7
		DIGIT,	DIGIT,	DIGIT,	DIGIT,	DIGIT,	DIGIT,	DIGIT,	DIGIT,

	;;;	8		9		:		;		<		=		>		?
		DIGIT,	DIGIT,	":",	";",	SHIFT,	'=',	SHIFT,	"?",

	;;;	@		A		B		C		D		E		F		G
		ERR,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	H		I		J		K		L		M		N		O
		ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	P		Q		R		S		T		U		V		W
		ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	X		Y		Z		[		\		]		^		_
		ALPHA,	ALPHA,	ALPHA,	"[",	ERR,	"]",	'=',	ALPHA,

	;;;	`		a		b		c		d		e		f		g
		ERR,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	h		i		j		k		l		m		n		o
		ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	p		q		r		s		t		u		v		w
		ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,	ALPHA,

	;;;	x		y		z		{		|		}		~		DEL
		ALPHA,	ALPHA,	ALPHA,	"{",	'=|',	"}",	"~",	space,

%};

/*
 *	Character input
 */

lvars
	input,
		;;; the current input: a pair consisting of a 1 character
		;;; put-back buffer and a character repeater
;

define lconstant getc() -> c;
	lvars c;
	if fast_front(input) ->> c then
		;;; buffered character: consume it, unless it's <termin>
		unless c == termin then false -> fast_front(input) endunless;
	else
		fast_apply(fast_back(input)) -> c;
		if isinteger(c) and c fi_>= 0 then
			if c fi_> 127 then chain(input, getc) endif;
		elseif c == termin then
			termin -> fast_front(input);
		else
			mishap(c, 1, 'CHARACTER CODE NEEDED');
		endif;
	endif;
enddefine;
;;;
define updaterof getc(c);
	lvars c;
	c -> fast_front(input);
enddefine;

define lconstant peekc() -> c;
	lvars c;
	getc() ->> c -> getc();
enddefine;

/*
 *	Token types
 */

define lconstant Error(c);
	lvars c;
	if c > `\s` and c < `\^?` then consstring(c, 1) -> c endif;
	mishap(c, 1, 'ILLEGAL INPUT CHARACTER');
enddefine;

define lconstant Integer(c, base, digits) -> n;
	lvars c, base, digits, d, n = 0;
	while c /== termin and strmember(uppertolower(c), digits) ->> d do
		n * base + (d fi_- 1) -> n;
		getc() -> c;
	endwhile;
	c -> getc();
enddefine;

define lconstant OctInt =
	Integer(% 8, '01234567' %);
enddefine;

define lconstant DecInt =
	Integer(% 10, '0123456789' %);
enddefine;

define lconstant HexInt =
	Integer(% 16, '0123456789abcdef' %);
enddefine;

define lconstant Float(n) -> n;
	lvars	c, d, power = 0, n;
	dlocal	popdprecision = true;
	getc() -> c;
	while c /== termin and (strmember(c, '0123456789') ->> d) do
		n * 10 + (d fi_- 1) -> n;
		power fi_- 1 -> power;
		getc() -> c;
	endwhile;
	if c == `e` or c == `E` then
		getc() -> c;
		if c == `-` then
			power - DecInt(getc())
		elseif c == `+` then
			power + DecInt(getc())
		else
			power + DecInt(c)
		endif -> power;
		getc() -> c;
	endif;
	unless strmember(c, 'lLfF') then c -> getc() endunless;
	n * (10.0 ** power) -> n;
enddefine;

define lconstant Number(c) -> n;
	lvars c, n, float = false;
	if c == `0` then
		getc() -> c;
		if c == `.` then
			Float(0) ->> n -> float;
		elseif c == `x` or c == `X` then
			HexInt(getc()) -> n;
		else
			OctInt(c) -> n;
		endif;
	else
		DecInt(c) -> n;
		if peekc() == `.` then
			getc() -> ;
			Float(n) ->> n -> float;
		endif;
	endif;
	if not(float) and strmember(peekc(), 'lLuU') then
		getc() -> ;
	endif;
enddefine;

define lconstant Ident(c);
	lvars c, type;
	consword(#|
		repeat
			c, getc() -> c;
			quitif(c == termin);
			fast_subscrv(c fi_+ 1, chartype) -> type;
			quitunless(type == ALPHA or type == DIGIT);
		endrepeat;
		c -> getc();
	|#);
enddefine;

define lconstant QuotedChars(quote);
	lvars c, c1, quote;

	define lconstant esc =
		newassoc([
			[`a		`\^G]
			[`b		`\^H]
			[`t		`\^I]
			[`n		`\^J]
			[`v		`\^K]
			[`f		`\^L]
			[`r		`\^M]
			[`\\	`\\]
			[`?		`?]
			[`'		`']
			[`"		`"]
		]);
	enddefine;

	(#|	until (getc() ->> c) == quote do
			if c == `\\` then
				getc() -> c;
				if esc(c) ->> c1 then
					c1;
				elseif c == `x` then
					HexInt(getc());
				else
					OctInt(c);
				endif;
			elseif c == `\n` or c == termin then
				mishap(0, 'UNTERMINATED STRING');
			else
				c;
			endif;
		enduntil;
	|#);
enddefine;

define lconstant Char(c);
	lvars c, n = QuotedChars(c);
	if n == 0 then
		mishap(0, 'EMPTY CHARACTER CONSTANT');
	endif;
	erasenum(n-1);
enddefine;

define lconstant String(c);
	lvars c;
	consstring(QuotedChars(c));
enddefine;

define lconstant Dot(c);
	lvars c;
	if (peekc() ->> c) == `.` then
		(getc(), getc()) -> (, c);
		unless c == `.` then
			Error(c);
		endunless;
		"'...'";
	elseif c /== termin and fast_subscrv(c fi_+ 1, chartype) == DIGIT then
		Float(0);
	else
		".";
	endif;
enddefine;

define lconstant Shift(c);	;;; < >
	lvars c;
	consword(#|
		c;
		if peekc() == c then getc() endif;
		if peekc() == `=` then getc() endif;
	|#);
enddefine;

lconstant lextable = {
	^Ident		;;; ALPHA
	^Number		;;; DIGIT
	^Char		;;; SQUOTE
	^String		;;; DQUOTE
	^Dot		;;; DOT
	^Shift		;;; SHIFT
	^Error		;;; ERR
};

/*
 *	The tokeniser
 */

define lconstant gettoken(input);
	lvars	c, token;
	dlocal	input;
	repeat
		getc() -> c;
		if c == termin then
			return(termin);
		elseif (fast_subscrv(c fi_+ 1, chartype) ->> token) == space then
			;;; ignore
		elseif c == `/` and peekc() == `*` then
			;;; comment
			getc() -> ;
			until (getc() ->> c) == `*` and peekc() == `/` do
				if c == termin then mishap(0, 'UNTERMINATED COMMENT') endif;
			enduntil;
			getc() -> ;
		elseif c == `#` then
			;;; preprocessor directive: treat as end-of-line comment!!
			until (getc() ->> c) == `\n` or c == termin do enduntil;
			c -> getc();
		else
			if isinteger(token) then
				fast_apply(c, fast_subscrv(token, lextable)) -> token;
			elseif isstring(token) then
				if strmember(peekc(), token) then
					consword(c, getc(), 2)
				else
					consword(c, 1)
				endif -> token;
			endif;
			return(token);
		endif;
	endrepeat;
enddefine;

;;; c_tokeniser:
;;;		returns a C tokeniser for a named file, input device or character
;;;		repeater

define global c_tokeniser(input);
	lvars input;
	unless isprocedure(input) then
		discin(input) -> input;
	endunless;
	gettoken(% writeable conspair(false, input) %);
enddefine;

endsection;


/* --- Revision History ---------------------------------------------------
--- Robert John Duncan, Jul  1 1993
		Added missing uses lr_parser
 */
