View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2021, University of Amsterdam
    7                              VU University Amsterdam
    8			      SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(dcg_basics,
   38	  [ white//0,			% <white inside line>
   39	    whites//0,			% <white inside line>*
   40	    blank//0,			% <blank>
   41	    blanks//0,			% <blank>*
   42	    nonblank//1,		% <nonblank>
   43	    nonblanks//1,		% <nonblank>* --> chars		(long)
   44	    blanks_to_nl//0,		% [space,tab,ret]*nl
   45	    string//1,			% <any>* -->chars		(short)
   46	    string_without//2,		% Exclude, -->chars		(long)
   47					% Characters
   48	    alpha_to_lower//1,		% Get lower|upper, return lower
   49					% Decimal numbers
   50	    digits//1,			% [0-9]* -->chars
   51	    digit//1,			% [0-9] --> char
   52	    integer//1,			% [+-][0-9]+ --> integer
   53	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   54	    number//1,			% integer | float
   55					% Hexadecimal numbers
   56	    xdigits//1,			% [0-9A-Fa-f]* --> 0-15*
   57	    xdigit//1,			% [0-9A-Fa-f] --> 0-15
   58	    xinteger//1,		% [0-9A-Fa-f]+ --> integer
   59
   60	    prolog_var_name//1,		% Read a Prolog variable name
   61
   62	    eol//0,			% End of line
   63	    eos//0,			% Test end of input.
   64	    remainder//1,		% -List
   65
   66					% generation (TBD)
   67	    atom//1			% generate atom
   68	  ]).   69:- use_module(library(lists)).   70:- use_module(library(error)).   71
   72
   73/** <module> Various general DCG utilities
   74
   75This library provides various commonly  used   DCG  primitives acting on
   76list  of  character  *codes*.  Character   classification  is  based  on
   77code_type/2.
   78
   79This module started its life as  library(http/dcg_basics) to support the
   80HTTP protocol. Since then, it was increasingly  used in code that has no
   81relation to HTTP and therefore  this  library   was  moved  to  the core
   82library.
   83
   84@tbd	This is just a starting point. We need a comprehensive set of
   85	generally useful DCG primitives.
   86*/
   87
   88%%	string_without(+EndCodes, -Codes)// is det.
   89%
   90%	Take as many codes from the input  until the next character code
   91%	appears in the list EndCodes.  The   terminating  code itself is
   92%	left on the input.  Typical  use  is   to  read  upto  a defined
   93%	delimiter such as a newline  or   other  reserved character. For
   94%	example:
   95%
   96%	    ==
   97%	        ...,
   98%	        string_without("\n", RestOfLine)
   99%	    ==
  100%
  101%	@arg EndCodes is a list of character codes.
  102%	@see string//1.
  103
  104string_without(End, Codes) -->
  105	{ string(End), !,
  106	  string_codes(End, EndCodes)
  107	},
  108	list_string_without(EndCodes, Codes).
  109string_without(End, Codes) -->
  110	list_string_without(End, Codes).
  111
  112list_string_without(Not, [C|T]) -->
  113	[C],
  114	{ \+ memberchk(C, Not)
  115	}, !,
  116	list_string_without(Not, T).
  117list_string_without(_, []) -->
  118	[].
  119
  120%%	string(-Codes)// is nondet.
  121%
  122%	Take as few as possible tokens from the input, taking one more
  123%	each time on backtracking. This code is normally followed by a
  124%	test for a delimiter.  For example:
  125%
  126%	==
  127%	upto_colon(Atom) -->
  128%		string(Codes), ":", !,
  129%		{ atom_codes(Atom, Codes) }.
  130%	==
  131%
  132%	@see string_without//2.
  133
  134string([]) -->
  135	[].
  136string([H|T]) -->
  137	[H],
  138	string(T).
  139
  140%%	blanks// is det.
  141%
  142%	Skip zero or more white-space characters.
  143
  144blanks -->
  145	blank, !,
  146	blanks.
  147blanks -->
  148	[].
  149
  150%%	blank// is semidet.
  151%
  152%	Take next =space= character from input. Space characters include
  153%	newline.
  154%
  155%	@see white//0
  156
  157blank -->
  158	[C],
  159	{ nonvar(C),
  160	  code_type(C, space)
  161	}.
  162
  163%%	nonblanks(-Codes)// is det.
  164%
  165%	Take all =graph= characters
  166
  167nonblanks([H|T]) -->
  168	[H],
  169	{ code_type(H, graph)
  170	}, !,
  171	nonblanks(T).
  172nonblanks([]) -->
  173	[].
  174
  175%%	nonblank(-Code)// is semidet.
  176%
  177%	Code is the next non-blank (=graph=) character.
  178
  179nonblank(H) -->
  180	[H],
  181	{ code_type(H, graph)
  182	}.
  183
  184%%	blanks_to_nl// is semidet.
  185%
  186%	Take a sequence of blank//0 codes if blanks are followed by a
  187%	newline or end of the input.
  188
  189blanks_to_nl -->
  190	"\n", !.
  191blanks_to_nl -->
  192	blank, !,
  193	blanks_to_nl.
  194blanks_to_nl -->
  195	eos.
  196
  197%%	whites// is det.
  198%
  199%	Skip white space _inside_ a line.
  200%
  201%	@see blanks//0 also skips newlines.
  202
  203whites -->
  204	white, !,
  205	whites.
  206whites -->
  207	[].
  208
  209%%	white// is semidet.
  210%
  211%	Take next =white= character from input. White characters do
  212%	_not_ include newline.
  213
  214white -->
  215	[C],
  216	{ nonvar(C),
  217	  code_type(C, white)
  218	}.
  219
  220
  221		 /*******************************
  222		 *	 CHARACTER STUFF	*
  223		 *******************************/
  224
  225%%	alpha_to_lower(?C)// is semidet.
  226%
  227%	Read a letter (class  =alpha=)  and   return  it  as a lowercase
  228%	letter. If C is instantiated and the  DCG list is already bound,
  229%	C must be =lower= and matches both a lower and uppercase letter.
  230%	If the output list is unbound, its first element is bound to C.
  231%	For example:
  232%
  233%	  ==
  234%	  ?- alpha_to_lower(0'a, `AB`, R).
  235%	  R = [66].
  236%	  ?- alpha_to_lower(C, `AB`, R).
  237%	  C = 97, R = [66].
  238%	  ?- alpha_to_lower(0'a, L, R).
  239%	  L = [97|R].
  240%	  ==
  241
  242alpha_to_lower(L) -->
  243	[C],
  244	{   nonvar(C)
  245	->  code_type(C, alpha),
  246	    code_type(C, to_upper(L))
  247	;   L = C
  248	}.
  249
  250
  251		 /*******************************
  252		 *	      NUMBERS		*
  253		 *******************************/
  254
  255%%	digits(?Chars)// is det.
  256%%	digit(?Char)// is det.
  257%%	integer(?Integer)// is det.
  258%
  259%	Number processing. The predicate  digits//1   matches a possibly
  260%	empty set of digits,  digit//1  processes   a  single  digit and
  261%	integer processes an  optional  sign   followed  by  a non-empty
  262%	sequence of digits into an integer.
  263
  264digits([H|T]) -->
  265	digit(H), !,
  266	digits(T).
  267digits([]) -->
  268	[].
  269
  270digit(C) -->
  271	[C],
  272	{ code_type(C, digit)
  273	}.
  274
  275integer(I, Head, Tail) :-
  276	nonvar(I), !,
  277	format(codes(Head, Tail), '~d', [I]).
  278integer(I) -->
  279	int_codes(Codes),
  280	{ number_codes(I, Codes)
  281	}.
  282
  283int_codes([C,D0|D]) -->
  284	sign(C), !,
  285	digit(D0),
  286	digits(D).
  287int_codes([D0|D]) -->
  288	digit(D0),
  289	digits(D).
  290
  291
  292%%	float(?Float)// is det.
  293%
  294%	Process a floating  point  number.   The  actual  conversion  is
  295%	controlled by number_codes/2.
  296
  297float(F, Head, Tail) :-
  298	float(F), !,
  299	with_output_to(codes(Head, Tail), write(F)).
  300float(F) -->
  301	number(F),
  302	{ float(F) }.
  303
  304%%	number(+Number)// is det.
  305%%	number(-Number)// is semidet.
  306%
  307%	Generate extract a number. Handles   both  integers and floating
  308%	point numbers.
  309
  310number(N, Head, Tail) :-
  311	number(N), !,
  312	format(codes(Head, Tail), '~w', N).
  313number(N) -->
  314	{ var(N)
  315	},
  316	!,
  317	int_codes(I),
  318	(   dot,
  319	    digit(DF0),
  320	    digits(DF)
  321	->  {F = [0'., DF0|DF]}
  322	;   {F = []}
  323	),
  324	(   exp
  325	->  int_codes(DI),
  326	    {E=[0'e|DI]}
  327	;   {E = []}
  328	),
  329	{ append([I, F, E], Codes),
  330	  number_codes(N, Codes)
  331	}.
  332number(N) -->
  333	{ type_error(number, N) }.
  334
  335sign(0'-) --> "-".
  336sign(0'+) --> "+".
  337
  338dot --> ".".
  339
  340exp --> "e".
  341exp --> "E".
  342
  343		 /*******************************
  344		 *	    HEX NUMBERS		*
  345		 *******************************/
  346
  347%%	xinteger(+Integer)// is det.
  348%%	xinteger(-Integer)// is semidet.
  349%
  350%	Generate or extract an integer from   a  sequence of hexadecimal
  351%	digits. Hexadecimal characters include both  uppercase (A-F) and
  352%	lowercase (a-f) letters. The value may   be  preceded by  a sign
  353%	(+/-)
  354
  355xinteger(Val, Head, Tail) :-
  356	integer(Val), !,
  357	format(codes(Head, Tail), '~16r', [Val]).
  358xinteger(Val) -->
  359	sign(C), !,
  360	xdigit(D0),
  361	xdigits(D),
  362	{ mkval([D0|D], 16, Val0),
  363	  (   C == 0'-
  364	  ->  Val is -Val0
  365	  ;   Val = Val0
  366	  )
  367	}.
  368xinteger(Val) -->
  369	xdigit(D0),
  370	xdigits(D),
  371	{ mkval([D0|D], 16, Val)
  372	}.
  373
  374%%	xdigit(-Weight)// is semidet.
  375%
  376%	True if the next code is a  hexdecimal digit with Weight. Weight
  377%	is  between  0  and  15.  Hexadecimal  characters  include  both
  378%	uppercase (A-F) and lowercase (a-f) letters.
  379
  380xdigit(D) -->
  381	[C],
  382	{ code_type(C, xdigit(D))
  383	}.
  384
  385%%	xdigits(-WeightList)// is det.
  386%
  387%	List of weights of a sequence   of hexadecimal codes. WeightList
  388%	may be empty. Hexadecimal  characters   include  both  uppercase
  389%	(A-F) and lowercase (a-f) letters.
  390
  391xdigits([D0|D]) -->
  392	xdigit(D0), !,
  393	xdigits(D).
  394xdigits([]) -->
  395	[].
  396
  397mkval([W0|Weights], Base, Val) :-
  398	mkval(Weights, Base, W0, Val).
  399
  400mkval([], _, W, W).
  401mkval([H|T], Base, W0, W) :-
  402	W1 is W0*Base+H,
  403	mkval(T, Base, W1, W).
  404
  405
  406		 /*******************************
  407		 *	   END-OF-STRING	*
  408		 *******************************/
  409
  410%!	eol//
  411%
  412%	Matches end-of-line. Matching \r\n, \n or end of input (eos//0).
  413
  414eol --> "\n", !.
  415eol --> "\r\n", !.
  416eol --> eos.
  417
  418%%	eos//
  419%
  420%	Matches  end-of-input.  The  implementation    behaves   as  the
  421%	following portable implementation:
  422%
  423%	  ==
  424%	  eos --> call(eos_).
  425%	  eos_([], []).
  426%	  ==
  427%
  428%	@tbd	This is a difficult concept and violates the _context free_
  429%		property of DCGs.  Explain the exact problems.
  430
  431eos([], []).
  432
  433%%	remainder(-List)//
  434%
  435%	Unify List with the remainder of the input.
  436
  437remainder(List, List, []).
  438
  439
  440		 /*******************************
  441		 *	   PROLOG SYNTAX		*
  442		 *******************************/
  443
  444%%	prolog_var_name(-Name:atom)// is semidet.
  445%
  446%	Matches a Prolog variable name. Primarily  intended to deal with
  447%	quasi quotations that embed Prolog variables.
  448
  449prolog_var_name(Name) -->
  450	[C0], { code_type(C0, prolog_var_start) }, !,
  451	prolog_id_cont(CL),
  452	{ atom_codes(Name, [C0|CL]) }.
  453
  454prolog_id_cont([H|T]) -->
  455	[H], { code_type(H, prolog_identifier_continue) }, !,
  456	prolog_id_cont(T).
  457prolog_id_cont([]) --> "".
  458
  459
  460		 /*******************************
  461		 *	     GENERATION		*
  462		 *******************************/
  463
  464%%	atom(++Atom)// is det.
  465%
  466%	Generate codes of Atom.  Current implementation uses write/1,
  467%	dealing with any Prolog term.  Atom must be ground though.
  468
  469atom(Atom, Head, Tail) :-
  470	must_be(ground, Atom),
  471	format(codes(Head, Tail), '~w', [Atom])