1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2013-2022, University of Amsterdam 7 VU University Amsterdam 8 SWI-Prolog Solutions b.v. 9 All rights reserved. 10 11 Redistribution and use in source and binary forms, with or without 12 modification, are permitted provided that the following conditions 13 are met: 14 15 1. Redistributions of source code must retain the above copyright 16 notice, this list of conditions and the following disclaimer. 17 18 2. Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in 20 the documentation and/or other materials provided with the 21 distribution. 22 23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 31 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 33 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 POSSIBILITY OF SUCH DAMAGE. 35*/ 36 37:- module(turtle, 38 [ rdf_load_turtle/3, % +Input, -Triples, +Options 39 rdf_read_turtle/3, % +Input, -Triples, +Options 40 rdf_process_turtle/3, % +Input, :OnObject, +Options 41 % re-exports 42 rdf_save_turtle/2, % +File, +Options 43 rdf_save_canonical_turtle/2, % +File, +Options 44 rdf_save_trig/2, % +File, +Options 45 rdf_save_canonical_trig/2, % +File, +Options 46 rdf_save_ntriples/2 % +File, +Options 47 ]). 48:- use_module(library(semweb/rdf_turtle_write)). % re-exports 49:- if(exists_source(library(semweb/rdf_db))). 50:- use_module(library(semweb/rdf_db), 51 [rdf_transaction/2,rdf_set_graph/2,rdf_assert/4]). 52:- endif. 53 54:- autoload(library(memfile), 55 [atom_to_memory_file/2,open_memory_file/4]). 56:- autoload(library(option),[option/3,option/2]). 57:- autoload(library(uri), 58 [uri_file_name/2,uri_is_global/1,uri_normalized/2]). 59:- autoload(library(http/http_open),[http_open/3]). 60 61% re-exports 62:- meta_predicate 63 rdf_process_turtle( , , ). 64 65:- predicate_options(rdf_load_turtle/3, 3, 66 [pass_to(rdf_read_turtle/3, 3)]). 67:- predicate_options(rdf_process_turtle/3, 3, 68 [ anon_prefix(atom), 69 base_uri(atom), 70 base_used(-atom), 71 db(atom), 72 error_count(-integer), 73 namespaces(-list), 74 on_error(oneof([warning,error])), 75 prefixes(-list), 76 resources(oneof([uri,iri])) 77 ]). 78:- predicate_options(rdf_read_turtle/3, 3, 79 [ anon_prefix(atom), 80 base_uri(atom), 81 base_used(-atom), 82 db(atom), 83 error_count(-integer), 84 namespaces(-list), 85 on_error(oneof([warning,error])), 86 prefixes(-list), 87 resources(oneof([uri,iri])) 88 ]). 89 90:- use_foreign_library(foreign(turtle)). 91:- public % used by the writer 92 turtle_pn_local/1, 93 turtle_write_quoted_string/2, 94 turtle_write_uri/2. 95 96/** <module> Turtle: Terse RDF Triple Language 97 98This module implements the Turtle language for representing the RDF 99triple model as defined by Dave Beckett from the Institute for Learning 100and Research Technology University of Bristol and later standardized by 101the W3C RDF working group. 102 103This module acts as a plugin to rdf_load/2, for processing files with 104one of the extensions =|.ttl|= or =|.n3|=. 105 106@see http://www.w3.org/TR/turtle/ (used W3C Recommendation 25 107 February 2014) 108*/ 109 110%! rdf_read_turtle(+Input, -Triples, +Options) 111% 112% Read a stream or file into a set of triples or quadruples (if 113% faced with TriG input) of the format 114% 115% rdf(Subject, Predicate, Object [, Graph]) 116% 117% The representation is consistent with the SWI-Prolog RDF/XML 118% and ntriples parsers. Provided options are: 119% 120% * base_uri(+BaseURI) 121% Initial base URI. Defaults to file://<file> for loading 122% files. 123% 124% * anon_prefix(+Prefix) 125% Blank nodes are generated as <Prefix>1, <Prefix>2, etc. 126% If Prefix is not an atom blank nodes are generated as 127% node(1), node(2), ... 128% 129% * format(+Format) 130% One of =auto= (default), =turtle= or =trig=. The 131% auto mode switches to TriG format of there is a 132% =|{|= before the first triple. Finally, of the 133% format is explicitly stated as =turtle= and the 134% file appears to be a TriG file, a warning is printed 135% and the data is loaded while ignoring the graphs. 136% 137% * resources(URIorIRI) 138% Officially, Turtle resources are IRIs. Quite a 139% few applications however send URIs. By default we 140% do URI->IRI mapping because this rarely causes errors. 141% To force strictly conforming mode, pass =iri=. 142% 143% * prefixes(-Pairs) 144% Return encountered prefix declarations as a 145% list of Alias-URI 146% 147% * namespaces(-Pairs) 148% Same as prefixes(Pairs). Compatibility to rdf_load/2. 149% 150% * base_used(-Base) 151% Base URI used for processing the data. Unified to 152% [] if there is no base-uri. 153% 154% * on_error(+ErrorMode) 155% In =warning= (default), print the error and continue 156% parsing the remainder of the file. If =error=, abort 157% with an exception on the first error encountered. 158% 159% * error_count(-Count) 160% If on_error(warning) is active, this option cane be 161% used to retrieve the number of generated errors. 162% 163% @param Input is one of stream(Stream), atom(Atom), a =http=, 164% =https= or =file= url or a filename specification as 165% accepted by absolute_file_name/3. 166 167rdf_read_turtle(In, Triples, Options) :- 168 base_uri(In, BaseURI, Options), 169 setup_call_cleanup( 170 ( open_input(In, Stream, Close), 171 create_turtle_parser(Parser, Stream, 172 [ base_uri(BaseURI) 173 | Options 174 ]) 175 ), 176 ( turtle_parse(Parser, Triples, 177 [ parse(document) 178 | Options 179 ]), 180 post_options(Parser, Options) 181 ), 182 ( destroy_turtle_parser(Parser), 183 call(Close) 184 )). 185 186%! rdf_load_turtle(+Input, -Triples, +Options) 187% 188% @deprecated Use rdf_read_turtle/3 189 190rdf_load_turtle(Input, Triples, Options) :- 191 rdf_read_turtle(Input, Triples, Options). 192 193 194%! rdf_process_turtle(+Input, :OnObject, +Options) is det. 195% 196% Streaming Turtle parser. The predicate rdf_process_turtle/3 197% processes Turtle data from Input, calling OnObject with a list 198% of triples for every Turtle _statement_ found in Input. OnObject 199% is called as below, where `ListOfTriples` is a list of 200% rdf(S,P,O) terms for a normal Turtle file or rdf(S,P,O,G) terms 201% if the =GRAPH= keyword is used to associate a set of triples in 202% the document with a particular graph. The `Graph` argument 203% provides the default graph for storing the triples and _Line_ is 204% the line number where the statement started. 205% 206% == 207% call(OnObject, ListOfTriples, Graph:Line) 208% == 209% 210% This predicate supports the same Options as rdf_load_turtle/3. 211% 212% Errors encountered are sent to print_message/2, after which the 213% parser tries to recover and parse the remainder of the data. 214% 215% @see This predicate is normally used by load_rdf/2 for 216% processing RDF data. 217 218rdf_process_turtle(In, OnObject, Options) :- 219 base_uri(In, BaseURI, Options), 220 option(graph(Graph), Options, BaseURI), 221 setup_call_cleanup( 222 ( open_input(In, Stream, Close), 223 create_turtle_parser(Parser, Stream, Options) 224 ), 225 ( process_turtle(Parser, Stream, OnObject, Graph, 226 [ parse(statement) 227 ]), 228 post_options(Parser, Options) 229 ), 230 ( destroy_turtle_parser(Parser), 231 call(Close) 232 )). 233 234post_options(Parser, Options) :- 235 prefix_option(Parser, Options), 236 namespace_option(Parser, Options), 237 base_option(Parser, Options), 238 error_option(Parser, Options). 239 240prefix_option(Parser, Options) :- 241 ( option(prefixes(Pairs), Options) 242 -> turtle_prefixes(Parser, Pairs) 243 ; true 244 ). 245namespace_option(Parser, Options) :- 246 ( option(namespaces(Pairs), Options) 247 -> turtle_prefixes(Parser, Pairs) 248 ; true 249 ). 250base_option(Parser, Options) :- 251 ( option(base_used(Base), Options) 252 -> turtle_base(Parser, Base) 253 ; true 254 ). 255error_option(Parser, Options) :- 256 ( option(error_count(Count), Options) 257 -> turtle_error_count(Parser, Count) 258 ; true 259 ). 260 261 262process_turtle(_Parser, Stream, _OnObject, _Graph, _Options) :- 263 at_end_of_stream(Stream), 264 !. 265process_turtle(Parser, Stream, OnObject, Graph, Options) :- 266 stream_pair(Stream, In, _), 267 line_count(In, LineNo), 268 turtle_parse(Parser, Triples, 269 [ parse(statement) 270 | Options 271 ]), 272 call(OnObject, Triples, Graph:LineNo), 273 process_turtle(Parser, Stream, OnObject, Graph, Options). 274 275 276%! open_input(+Input, -Stream, -Close) is det. 277% 278% Open given input. 279% 280% @param Close goal to undo the open action 281% @tbd Synchronize with input handling of rdf_db.pl. 282% @error existence_error, permission_error 283 284open_input(stream(Stream), Stream, Close) :- 285 !, 286 stream_property(Stream, encoding(Old)), 287 ( ( unicode_encoding(Old) 288 ; stream_property(Stream, type(text)) 289 ) 290 -> Close = true 291 ; set_stream(Stream, encoding(utf8)), 292 Close = set_stream(Stream, encoding(Old)) 293 ). 294open_input(Stream, Stream, Close) :- 295 is_stream(Stream), 296 !, 297 open_input(stream(Stream), Stream, Close). 298open_input(atom(Atom), Stream, close(Stream)) :- 299 !, 300 atom_to_memory_file(Atom, MF), 301 open_memory_file(MF, read, Stream, [free_on_close(true)]). 302open_input(URL, Stream, close(Stream)) :- 303 ( sub_atom(URL, 0, _, _, 'http://') 304 ; sub_atom(URL, 0, _, _, 'https://') 305 ), 306 !, 307 http_open(URL, Stream, []), 308 set_stream(Stream, encoding(utf8)). 309open_input(URL, Stream, close(Stream)) :- 310 uri_file_name(URL, Path), 311 !, 312 open(Path, read, Stream, [encoding(utf8)]). 313open_input(File, Stream, close(Stream)) :- 314 absolute_file_name(File, Path, 315 [ access(read), 316 extensions([ttl, '']) 317 ]), 318 open(Path, read, Stream, [encoding(utf8)]). 319 320unicode_encoding(utf8). 321unicode_encoding(wchar_t). 322unicode_encoding(unicode_be). 323unicode_encoding(unicode_le). 324 325%! base_uri(+Input, -BaseURI, +Options) 326% 327% Determine the base uri to use for processing. 328 329base_uri(_Input, BaseURI, Options) :- 330 option(base_uri(BaseURI), Options), 331 !. 332base_uri(_Input, BaseURI, Options) :- 333 option(graph(BaseURI), Options), 334 !. 335base_uri(stream(Input), BaseURI, _Options) :- 336 stream_property(Input, file_name(Name)), 337 !, 338 name_uri(Name, BaseURI). 339base_uri(Stream, BaseURI, Options) :- 340 is_stream(Stream), 341 !, 342 base_uri(stream(Stream), BaseURI, Options). 343base_uri(Name, BaseURI, _Options) :- 344 atom(Name), 345 !, 346 name_uri(Name, BaseURI). 347base_uri(_, 'http://www.example.com/', _). 348 349name_uri(Name, BaseURI) :- 350 uri_is_global(Name), 351 !, 352 uri_normalized(Name, BaseURI). 353name_uri(Name, BaseURI) :- 354 uri_file_name(BaseURI, Name). 355 356 357 /******************************* 358 * WRITE SUPPORT * 359 *******************************/ 360 361%! turtle_pn_local(+Atom:atom) is semidet. 362% 363% True if Atom is a valid Turtle _PN_LOCAL_ name. The PN_LOCAL 364% name is what can follow the : in a resource. In the new Turtle, 365% this can be anything and this function becomes meaningless. In 366% the old turtle, PN_LOCAL is defined similar (but not equal) to 367% an XML name. This predicate is used by rdf_save_turtle/2 to 368% write files such that can be read by old parsers. 369% 370% @see xml_name/2. 371 372%! turtle_write_quoted_string(+Out, +Value, ?WriteLong) is det. 373% 374% Write Value (an atom) as a valid Turtle string. WriteLong 375% determines wether the string is written as a _short_ or _long_ 376% string. It takes the following values: 377% 378% * true 379% Use Turtle's long string syntax. Embeded newlines and 380% single or double quotes are are emitted verbatim. 381% * false 382% Use Turtle's short string syntax. 383% * Var 384% If WriteLong is unbound, this predicate uses long syntax 385% if newlines appear in the string and short otherwise. WriteLong 386% is unified with the decision taken. 387 388%! turtle_write_quoted_string(+Out, +Value) is det. 389% 390% Same as turtle_write_quoted_string(Out, Value, false), writing a 391% string with only a single =|"|=. Embedded newlines are escapes 392% as =|\n|=. 393 394turtle_write_quoted_string(Out, Text) :- 395 turtle_write_quoted_string(Out, Text, false). 396 397%! turtle_write_uri(+Out, +Value) is det. 398% 399% Write a URI as =|<...>|= 400 401 402 /******************************* 403 * RDF-DB HOOK * 404 *******************************/ 405 406:- if(current_predicate(rdf_transaction/2)). 407:- multifile 408 rdf_db:rdf_load_stream/3, 409 rdf_db:rdf_file_type/2. 410 411%! rdf_db:rdf_load_stream(+Format, +Stream, :Options) 412% 413% (Turtle clauses) 414 415rdf_dbrdf_load_stream(turtle, Stream, Options) :- 416 load_turtle_stream(Stream, Options). 417rdf_dbrdf_load_stream(trig, Stream, Options) :- 418 load_turtle_stream(Stream, Options). 419 420load_turtle_stream(Stream, _Module:Options) :- 421 rdf_db:graph(Options, Graph), 422 atom_concat('_:', Graph, BNodePrefix), 423 rdf_transaction(( rdf_process_turtle(Stream, assert_triples, 424 [ anon_prefix(BNodePrefix) 425 | Options 426 ]), 427 rdf_set_graph(Graph, modified(false)) 428 ), 429 parse(Graph)). 430 431assert_triples([], _). 432assert_triples([H|T], Location) :- 433 assert_triple(H, Location), 434 assert_triples(T, Location). 435 436assert_triple(rdf(S,P,O), Location) :- 437 rdf_assert(S,P,O,Location). 438assert_triple(rdf(S,P,O,G), _) :- 439 rdf_assert(S,P,O,G). 440 441 442rdf_dbrdf_file_type(ttl, turtle). 443rdf_dbrdf_file_type(n3, turtle). % not really, but good enough 444rdf_dbrdf_file_type(trig, trig). 445:- endif. 446 447 448 /******************************* 449 * MESSAGES * 450 *******************************/ 451 452:- multifile prolog:error_message//1. 453 454prologerror_message(existence_error(turtle_prefix, '')) --> 455 [ 'Turtle empty prefix (:) is not defined' ]