View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2021, University of Amsterdam
    7                              VU University Amsterdam
    8			      SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(dcg_basics,
   38	  [ white//0,			% <white inside line>
   39	    whites//0,			% <white inside line>*
   40	    blank//0,			% <blank>
   41	    blanks//0,			% <blank>*
   42	    nonblank//1,		% <nonblank>
   43	    nonblanks//1,		% <nonblank>* --> chars		(long)
   44	    blanks_to_nl//0,		% [space,tab,ret]*nl
   45	    string//1,			% <any>* -->chars		(short)
   46	    string_without//2,		% Exclude, -->chars		(long)
   47					% Characters
   48	    alpha_to_lower//1,		% Get lower|upper, return lower
   49					% Decimal numbers
   50	    digits//1,			% [0-9]* -->chars
   51	    digit//1,			% [0-9] --> char
   52	    integer//1,			% [+-][0-9]+ --> integer
   53	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   54	    number//1,			% integer | float
   55					% Hexadecimal numbers
   56	    xdigits//1,			% [0-9A-Fa-f]* --> 0-15*
   57	    xdigit//1,			% [0-9A-Fa-f] --> 0-15
   58	    xinteger//1,		% [0-9A-Fa-f]+ --> integer
   59
   60	    prolog_var_name//1,		% Read a Prolog variable name
   61
   62	    eol//0,			% End of line
   63	    eos//0,			% Test end of input.
   64	    remainder//1,		% -List
   65
   66					% generation (TBD)
   67	    atom//1			% generate atom
   68	  ]).   69:- use_module(library(lists)).   70:- use_module(library(error)).

Various general DCG utilities

This library provides various commonly used DCG primitives acting on list of character codes. Character classification is based on code_type/2.

This module started its life as library(http/dcg_basics) to support the HTTP protocol. Since then, it was increasingly used in code that has no relation to HTTP and therefore this library was moved to the core library.

To be done
- This is just a starting point. We need a comprehensive set of generally useful DCG primitives. */
 string_without(+EndCodes, -Codes)// is det
Take as many codes from the input until the next character code appears in the list EndCodes. The terminating code itself is left on the input. Typical use is to read upto a defined delimiter such as a newline or other reserved character. For example:
    ...,
    string_without("\n", RestOfLine)
Arguments:
EndCodes- is a list of character codes.
See also
- string//1.
  104string_without(End, Codes) -->
  105	{ string(End), !,
  106	  string_codes(End, EndCodes)
  107	},
  108	list_string_without(EndCodes, Codes).
  109string_without(End, Codes) -->
  110	list_string_without(End, Codes).
  111
  112list_string_without(Not, [C|T]) -->
  113	[C],
  114	{ \+ memberchk(C, Not)
  115	}, !,
  116	list_string_without(Not, T).
  117list_string_without(_, []) -->
  118	[].
 string(-Codes)// is nondet
Take as few as possible tokens from the input, taking one more each time on backtracking. This code is normally followed by a test for a delimiter. For example:
upto_colon(Atom) -->
        string(Codes), ":", !,
        { atom_codes(Atom, Codes) }.
See also
- string_without//2.
  134string([]) -->
  135	[].
  136string([H|T]) -->
  137	[H],
  138	string(T).
 blanks// is det
Skip zero or more white-space characters.
  144blanks -->
  145	blank, !,
  146	blanks.
  147blanks -->
  148	[].
 blank// is semidet
Take next space character from input. Space characters include newline.
See also
- white//0
  157blank -->
  158	[C],
  159	{ nonvar(C),
  160	  code_type(C, space)
  161	}.
 nonblanks(-Codes)// is det
Take all graph characters
  167nonblanks([H|T]) -->
  168	[H],
  169	{ code_type(H, graph)
  170	}, !,
  171	nonblanks(T).
  172nonblanks([]) -->
  173	[].
 nonblank(-Code)// is semidet
Code is the next non-blank (graph) character.
  179nonblank(H) -->
  180	[H],
  181	{ code_type(H, graph)
  182	}.
 blanks_to_nl// is semidet
Take a sequence of blank//0 codes if blanks are followed by a newline or end of the input.
  189blanks_to_nl -->
  190	"\n", !.
  191blanks_to_nl -->
  192	blank, !,
  193	blanks_to_nl.
  194blanks_to_nl -->
  195	eos.
 whites// is det
Skip white space inside a line.
See also
- blanks//0 also skips newlines.
  203whites -->
  204	white, !,
  205	whites.
  206whites -->
  207	[].
 white// is semidet
Take next white character from input. White characters do not include newline.
  214white -->
  215	[C],
  216	{ nonvar(C),
  217	  code_type(C, white)
  218	}.
  219
  220
  221		 /*******************************
  222		 *	 CHARACTER STUFF	*
  223		 *******************************/
 alpha_to_lower(?C)// is semidet
Read a letter (class alpha) and return it as a lowercase letter. If C is instantiated and the DCG list is already bound, C must be lower and matches both a lower and uppercase letter. If the output list is unbound, its first element is bound to C. For example:
?- alpha_to_lower(0'a, `AB`, R).
R = [66].
?- alpha_to_lower(C, `AB`, R).
C = 97, R = [66].
?- alpha_to_lower(0'a, L, R).
L = [97|R].
  242alpha_to_lower(L) -->
  243	[C],
  244	{   nonvar(C)
  245	->  code_type(C, alpha),
  246	    code_type(C, to_upper(L))
  247	;   L = C
  248	}.
  249
  250
  251		 /*******************************
  252		 *	      NUMBERS		*
  253		 *******************************/
 digits(?Chars)// is det
 digit(?Char)// is det
 integer(?Integer)// is det
Number processing. The predicate digits//1 matches a possibly empty set of digits, digit//1 processes a single digit and integer processes an optional sign followed by a non-empty sequence of digits into an integer.
  264digits([H|T]) -->
  265	digit(H), !,
  266	digits(T).
  267digits([]) -->
  268	[].
  269
  270digit(C) -->
  271	[C],
  272	{ code_type(C, digit)
  273	}.
  274
  275integer(I, Head, Tail) :-
  276	nonvar(I), !,
  277	format(codes(Head, Tail), '~d', [I]).
  278integer(I) -->
  279	int_codes(Codes),
  280	{ number_codes(I, Codes)
  281	}.
  282
  283int_codes([C,D0|D]) -->
  284	sign(C), !,
  285	digit(D0),
  286	digits(D).
  287int_codes([D0|D]) -->
  288	digit(D0),
  289	digits(D).
 float(?Float)// is det
Process a floating point number. The actual conversion is controlled by number_codes/2.
  297float(F, Head, Tail) :-
  298	float(F), !,
  299	with_output_to(codes(Head, Tail), write(F)).
  300float(F) -->
  301	number(F),
  302	{ float(F) }.
 number(+Number)// is det
number(-Number)// is semidet
Generate extract a number. Handles both integers and floating point numbers.
  310number(N, Head, Tail) :-
  311	number(N), !,
  312	format(codes(Head, Tail), '~w', N).
  313number(N) -->
  314	{ var(N)
  315	},
  316	!,
  317	int_codes(I),
  318	(   dot,
  319	    digit(DF0),
  320	    digits(DF)
  321	->  {F = [0'., DF0|DF]}
  322	;   {F = []}
  323	),
  324	(   exp
  325	->  int_codes(DI),
  326	    {E=[0'e|DI]}
  327	;   {E = []}
  328	),
  329	{ append([I, F, E], Codes),
  330	  number_codes(N, Codes)
  331	}.
  332number(N) -->
  333	{ type_error(number, N) }.
  334
  335sign(0'-) --> "-".
  336sign(0'+) --> "+".
  337
  338dot --> ".".
  339
  340exp --> "e".
  341exp --> "E".
  342
  343		 /*******************************
  344		 *	    HEX NUMBERS		*
  345		 *******************************/
 xinteger(+Integer)// is det
xinteger(-Integer)// is semidet
Generate or extract an integer from a sequence of hexadecimal digits. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters. The value may be preceded by a sign (+/-)
  355xinteger(Val, Head, Tail) :-
  356	integer(Val), !,
  357	format(codes(Head, Tail), '~16r', [Val]).
  358xinteger(Val) -->
  359	sign(C), !,
  360	xdigit(D0),
  361	xdigits(D),
  362	{ mkval([D0|D], 16, Val0),
  363	  (   C == 0'-
  364	  ->  Val is -Val0
  365	  ;   Val = Val0
  366	  )
  367	}.
  368xinteger(Val) -->
  369	xdigit(D0),
  370	xdigits(D),
  371	{ mkval([D0|D], 16, Val)
  372	}.
 xdigit(-Weight)// is semidet
True if the next code is a hexdecimal digit with Weight. Weight is between 0 and 15. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  380xdigit(D) -->
  381	[C],
  382	{ code_type(C, xdigit(D))
  383	}.
 xdigits(-WeightList)// is det
List of weights of a sequence of hexadecimal codes. WeightList may be empty. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  391xdigits([D0|D]) -->
  392	xdigit(D0), !,
  393	xdigits(D).
  394xdigits([]) -->
  395	[].
  396
  397mkval([W0|Weights], Base, Val) :-
  398	mkval(Weights, Base, W0, Val).
  399
  400mkval([], _, W, W).
  401mkval([H|T], Base, W0, W) :-
  402	W1 is W0*Base+H,
  403	mkval(T, Base, W1, W).
  404
  405
  406		 /*******************************
  407		 *	   END-OF-STRING	*
  408		 *******************************/
 eol//
Matches end-of-line. Matching \r\n, \n or end of input (eos//0).
  414eol --> "\n", !.
  415eol --> "\r\n", !.
  416eol --> eos.
 eos//
Matches end-of-input. The implementation behaves as the following portable implementation:
eos --> call(eos_).
eos_([], []).
To be done
- This is a difficult concept and violates the context free property of DCGs. Explain the exact problems.
  431eos([], []).
 remainder(-List)//
Unify List with the remainder of the input.
  437remainder(List, List, []).
  438
  439
  440		 /*******************************
  441		 *	   PROLOG SYNTAX		*
  442		 *******************************/
 prolog_var_name(-Name:atom)// is semidet
Matches a Prolog variable name. Primarily intended to deal with quasi quotations that embed Prolog variables.
  449prolog_var_name(Name) -->
  450	[C0], { code_type(C0, prolog_var_start) }, !,
  451	prolog_id_cont(CL),
  452	{ atom_codes(Name, [C0|CL]) }.
  453
  454prolog_id_cont([H|T]) -->
  455	[H], { code_type(H, prolog_identifier_continue) }, !,
  456	prolog_id_cont(T).
  457prolog_id_cont([]) --> "".
  458
  459
  460		 /*******************************
  461		 *	     GENERATION		*
  462		 *******************************/
 atom(++Atom)// is det
Generate codes of Atom. Current implementation uses write/1, dealing with any Prolog term. Atom must be ground though.
  469atom(Atom, Head, Tail) :-
  470	must_be(ground, Atom),
  471	format(codes(Head, Tail), '~w', [Atom])