1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2010-2020, University of Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(rdf_write, 37 [ rdf_write_xml/2 % +Stream, +Triples 38 ]). 39:- autoload(library(assoc), 40 [empty_assoc/1,put_assoc/4,assoc_to_keys/2,get_assoc/3]). 41:- autoload(library(debug),[assertion/1]). 42:- autoload(library(lists),[member/2,append/3,select/3]). 43:- autoload(library(sgml), 44 [xml_quote_attribute/3, xml_name/1, xml_quote_cdata/3, xml_is_dom/1]). 45:- autoload(library(sgml_write),[xml_write/3]). 46:- use_module(library(semweb/rdf_prefixes), 47 [rdf_global_id/2, rdf_register_ns/2, op(_,_,_)]). 48:- if(exists_source(library(semweb/rdf_db))). 49:- autoload(library(semweb/rdf_db), 50 [rdf_is_bnode/1, rdf_equal/2]). 51:- else. 52:- rdf_meta 53 rdf_equal(o,o). 54 55rdf_equal(X,X). 56rdf_is_bnode(Node) :- 57 atom(Node), 58 sub_atom(Node, 0, _, _, '_:'). 59 60lang_equal(X, Y) :- 61 downcase_atom(X, L), 62 downcase_atom(Y, L). 63 64 65:- endif.
89 /******************************* 90 * WRITE RDFXML * 91 *******************************/
97rdf_write_xml(Out, Triples) :- 98 sort(Triples, Unique), 99 rdf_write_header(Out, Unique), 100 node_id_map(Unique, AnonIDs), 101 rdf_write_triples(Unique, AnonIDs, Out), 102 rdf_write_footer(Out). 103 104 105 /******************************* 106 * HEADER/FOOTER * 107 *******************************/
114rdf_write_header(Out, Triples) :- 115 xml_encoding(Out, Enc, Encoding), 116 format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]), 117 format(Out, '<!DOCTYPE rdf:RDF [', []), 118 used_namespaces(Triples, NSList), 119 ( member(Id, NSList), 120 ns(Id, NS), 121 xml_quote_attribute(NS, NSText0, Enc), 122 xml_escape_parameter_entity(NSText0, NSText), 123 format(Out, '~N <!ENTITY ~w \'~w\'>', [Id, NSText]), 124 fail 125 ; true 126 ), 127 format(Out, '~N]>~n~n', []), 128 format(Out, '<rdf:RDF', []), 129 ( member(Id, NSList), 130 format(Out, '~N xmlns:~w="&~w;"~n', [Id, Id]), 131 fail 132 ; true 133 ), 134 format(Out, '>~n', []). 135 136 137xml_encoding(Out, Enc, Encoding) :- 138 stream_property(Out, encoding(Enc)), 139 ( xml_encoding_name(Enc, Encoding) 140 -> true 141 ; throw(error(domain_error(rdf_encoding, Enc), _)) 142 ). 143 144xml_encoding_name(ascii, 'US-ASCII'). 145xml_encoding_name(iso_latin_1, 'ISO-8859-1'). 146xml_encoding_name(utf8, 'UTF-8').
152xml_escape_parameter_entity(In, Out) :- 153 sub_atom(In, _, _, _, '%'), 154 !, 155 atom_codes(In, Codes), 156 phrase(escape_parent(Codes), OutCodes), 157 atom_codes(Out, OutCodes). 158xml_escape_parameter_entity(In, In). 159 160escape_parent([]) --> []. 161escape_parent([H|T]) --> 162 ( { H == 37 } 163 -> "%" 164 ; [H] 165 ), 166 escape_parent(T).
173used_namespaces(Triples, NSList) :- 174 decl_used_predicate_ns(Triples), 175 resources(Triples, Resources), 176 empty_assoc(A0), 177 put_assoc(rdf, A0, *, A1), % needed for rdf:RDF 178 res_used_namespaces(Resources, _NoNS, A1, A), 179 assoc_to_keys(A, NSList). 180 181 182res_used_namespaces([], [], A, A). 183res_used_namespaces([Resource|T], NoNS, A0, A) :- 184 ns(NS, Full), 185 Full \== '', 186 atom_concat(Full, Local, Resource), 187 xml_name(Local), 188 !, 189 put_assoc(NS, A0, *, A1), 190 res_used_namespaces(T, NoNS, A1, A). 191res_used_namespaces([R|T0], [R|T], A0, A) :- 192 res_used_namespaces(T0, T, A0, A).
198resources(Triples, Resources) :- 199 phrase(resources(Triples), Raw), 200 sort(Raw, Resources). 201 202resources([]) --> 203 []. 204resources([rdf(S,P,O)|T]) --> 205 [S,P], 206 object_resources(O), 207 resources(T). 208 209object_resources(Atom) --> 210 { atom(Atom) }, 211 !, 212 [ Atom ]. 213object_resources(literal(type(Type, _))) --> 214 !, 215 [ Type ]. 216object_resources(_) --> 217 [].
224:- thread_local 225 predicate_ns/2. 226 227decl_used_predicate_ns(Triples) :- 228 retractall(predicate_ns(_,_)), 229 ( member(rdf(_,P,_), Triples), 230 decl_predicate_ns(P), 231 fail 232 ; true 233 ). 234 235decl_predicate_ns(Pred) :- 236 predicate_ns(Pred, _), 237 !. 238decl_predicate_ns(Pred) :- 239 rdf_global_id(NS:Local, Pred), 240 xml_name(Local), 241 !, 242 assert(predicate_ns(Pred, NS)). 243decl_predicate_ns(Pred) :- 244 is_bag_li_predicate(Pred), 245 !. 246decl_predicate_ns(Pred) :- 247 atom_codes(Pred, Codes), 248 append(NSCodes, LocalCodes, Codes), 249 xml_codes(LocalCodes), 250 !, 251 ( NSCodes \== [] 252 -> atom_codes(NS, NSCodes), 253 ( ns(Id, NS) 254 -> assert(predicate_ns(Pred, Id)) 255 ; between(1, infinite, N), 256 atom_concat(ns, N, Id), 257 \+ ns(Id, _) 258 -> rdf_register_ns(Id, NS), 259 print_message(informational, 260 rdf(using_namespace(Id, NS))) 261 ), 262 assert(predicate_ns(Pred, Id)) 263 ; assert(predicate_ns(Pred, -)) % no namespace used 264 ). 265 266xml_codes([]). 267xml_codes([H|T]) :- 268 xml_code(H), 269 xml_codes(T). 270 271xml_code(X) :- 272 code_type(X, csym), 273 !. 274xml_code(0'-). % ' 275 276 Out) (:- 278 format(Out, '</rdf:RDF>~n', []). 279 280 281 /******************************* 282 * ANONYMOUS IDS * 283 *******************************/
291node_id_map(Triples, IdMap) :- 292 anonymous_objects(Triples, Objs), 293 msort(Objs, Sorted), 294 empty_assoc(IdMap0), 295 nodeid_map(Sorted, 0, IdMap0, IdMap). 296 297anonymous_objects([], []). 298anonymous_objects([rdf(_,_,O)|T0], Anon) :- 299 rdf_is_bnode(O), 300 !, 301 Anon = [O|T], 302 anonymous_objects(T0, T). 303anonymous_objects([_|T0], T) :- 304 anonymous_objects(T0, T). 305 306nodeid_map([], _, Map, Map). 307nodeid_map([H,H|T0], Id, Map0, Map) :- 308 !, 309 remove_leading(H, T0, T), 310 atom_concat(bn, Id, NodeId), 311 put_assoc(H, Map0, NodeId, Map1), 312 Id2 is Id + 1, 313 nodeid_map(T, Id2, Map1, Map). 314nodeid_map([_|T], Id, Map0, Map) :- 315 nodeid_map(T, Id, Map0, Map). 316 317remove_leading(H, [H|T0], T) :- 318 !, 319 remove_leading(H, T0, T). 320remove_leading(_, T, T). 321 322 323 /******************************* 324 * TRIPLES * 325 *******************************/ 326 327rdf_write_triples(Triples, NodeIDs, Out) :- 328 rdf_write_triples(Triples, NodeIDs, Out, [], Anon), 329 rdf_write_anon(Anon, NodeIDs, Out, Anon). 330 331rdf_write_triples([], _, _, Anon, Anon). 332rdf_write_triples([H|T0], NodeIDs, Out, Anon0, Anon) :- 333 arg(1, H, S), 334 subject_triples(S, [H|T0], T, OnSubject), 335 ( rdf_is_bnode(S) 336 -> rdf_write_triples(T, NodeIDs, Out, [anon(S,_,OnSubject)|Anon0], Anon) 337 ; rdf_write_subject(OnSubject, S, NodeIDs, Out, Anon0), 338 rdf_write_triples(T, NodeIDs, Out, Anon0, Anon) 339 ). 340 341subject_triples(S, [H|T0], T, [H|M]) :- 342 arg(1, H, S), 343 !, 344 subject_triples(S, T0, T, M). 345subject_triples(_, T, T, []). 346 347 348rdf_write_anon([], _, _, _). 349rdf_write_anon([anon(Subject, Done, Triples)|T], NodeIDs, Out, Anon) :- 350 Done \== true, 351 !, 352 Done = true, 353 rdf_write_subject(Triples, Subject, NodeIDs, Out, Anon), 354 rdf_write_anon(T, NodeIDs, Out, Anon). 355rdf_write_anon([_|T], NodeIDs, Out, Anon) :- 356 rdf_write_anon(T, NodeIDs, Out, Anon). 357 358rdf_write_subject(Triples, Subject, NodeIDs, Out, Anon) :- 359 rdf_write_subject(Triples, Out, Subject, NodeIDs, -, 0, Anon), 360 !, 361 format(Out, '~n', []). 362rdf_write_subject(_, Subject, _, _, _) :- 363 throw(error(rdf_save_failed(Subject), 'Internal error')). 364 365rdf_write_subject(Triples, Out, Subject, NodeIDs, DefNS, Indent, Anon) :- 366 rdf_equal(rdf:type, RdfType), 367 select(rdf(_, RdfType,Type), Triples, Triples1), 368 \+ rdf_is_bnode(Type), 369 rdf_id(Type, DefNS, TypeId), 370 xml_is_name(TypeId), 371 !, 372 format(Out, '~*|<', [Indent]), 373 rdf_write_id(Out, TypeId), 374 save_about(Out, Subject, NodeIDs), 375 save_attributes(Triples1, DefNS, Out, NodeIDs, TypeId, Indent, Anon). 376rdf_write_subject(Triples, Out, Subject, NodeIDs, _DefNS, Indent, Anon) :- 377 format(Out, '~*|<rdf:Description', [Indent]), 378 save_about(Out, Subject, NodeIDs), 379 save_attributes(Triples, rdf, Out, NodeIDs, rdf:'Description', Indent, Anon). 380 381xml_is_name(_NS:Atom) :- 382 !, 383 xml_name(Atom). 384xml_is_name(Atom) :- 385 xml_name(Atom). 386 387save_about(Out, Subject, NodeIDs) :- 388 rdf_is_bnode(Subject), 389 !, 390 ( get_assoc(Subject, NodeIDs, NodeID) 391 -> format(Out,' rdf:nodeID="~w"', [NodeID]) 392 ; true 393 ). 394save_about(Out, Subject, _) :- 395 stream_property(Out, encoding(Encoding)), 396 rdf_value(Subject, QSubject, Encoding), 397 format(Out, ' rdf:about="~w"', [QSubject]), 398 !. 399save_about(_, _, _) :- 400 assertion(fail).
408save_attributes(Triples, DefNS, Out, NodeIDs, Element, Indent, Anon) :- 409 split_attributes(Triples, InTag, InBody), 410 SubIndent is Indent + 2, 411 save_attributes2(InTag, DefNS, tag, Out, NodeIDs, SubIndent, Anon), 412 ( InBody == [] 413 -> format(Out, '/>~n', []) 414 ; format(Out, '>~n', []), 415 save_attributes2(InBody, _, body, Out, NodeIDs, SubIndent, Anon), 416 format(Out, '~N~*|</~w>~n', [Indent, Element]) 417 ). 418 419% split_attributes(+Triples, -HeadAttrs, -BodyAttr) 420% 421% Split attribute (Name=Value) list into attributes for the head 422% and body. Attributes can only be in the head if they are literal 423% and appear only one time in the attribute list. 424 425split_attributes(Triples, HeadAttr, BodyAttr) :- 426 duplicate_attributes(Triples, Dupls, Singles), 427 simple_literal_attributes(Singles, HeadAttr, Rest), 428 append(Dupls, Rest, BodyAttr). 429 430% duplicate_attributes(+Attrs, -Duplicates, -Singles) 431% 432% Extract attributes that appear more than onces as we cannot 433% dublicate an attribute in the head according to the XML rules. 434 435duplicate_attributes([], [], []). 436duplicate_attributes([H|T], Dupls, Singles) :- 437 arg(2, H, Name), 438 named_attributes(Name, T, D, R), 439 D \== [], 440 append([H|D], Dupls2, Dupls), 441 !, 442 duplicate_attributes(R, Dupls2, Singles). 443duplicate_attributes([H|T], Dupls2, [H|Singles]) :- 444 duplicate_attributes(T, Dupls2, Singles). 445 446named_attributes(_, [], [], []) :- !. 447named_attributes(Name, [H|T], D, R) :- 448 ( arg(2, H, Name) 449 -> D = [H|DT], 450 named_attributes(Name, T, DT, R) 451 ; R = [H|RT], 452 named_attributes(Name, T, D, RT) 453 ). 454 455% simple_literal_attributes(+Attributes, -Inline, -Body) 456% 457% Split attributes for (literal) attributes to be used in the 458% begin-tag and ones that have to go into the body of the description. 459 460simple_literal_attributes([], [], []). 461simple_literal_attributes([H|TA], [H|TI], B) :- 462 in_tag_attribute(H), 463 !, 464 simple_literal_attributes(TA, TI, B). 465simple_literal_attributes([H|TA], I, [H|TB]) :- 466 simple_literal_attributes(TA, I, TB). 467 468in_tag_attribute(rdf(_,P,literal(Text))) :- 469 atom(Text), % may not have lang qualifier 470 atom_length(Text, Len), 471 Len < 60, 472 \+ is_bag_li_predicate(P). 473 474 475% save_attributes(+List, +DefNS, +TagOrBody, +Out, +NodeIDs, +Indent, +Anon) 476% 477% Save a list of attributes. 478 479save_attributes2([], _, _, _, _, _, _). 480save_attributes2([H|T], DefNS, Where, Out, NodeIDs, Indent, Anon) :- 481 save_attribute(Where, H, DefNS, Out, NodeIDs, Indent, Anon), 482 save_attributes2(T, DefNS, Where, Out, NodeIDs, Indent, Anon).
486save_attribute(tag, rdf(_, Name, literal(Value)), DefNS, Out, _, Indent, _Anon) :- 487 AttIndent is Indent + 2, 488 rdf_att_id(Name, DefNS, NameText), 489 stream_property(Out, encoding(Encoding)), 490 xml_quote_attribute(Value, QVal, Encoding), 491 format(Out, '~N~*|', [AttIndent]), 492 rdf_write_id(Out, NameText), 493 format(Out, '="~w"', [QVal]). 494save_attribute(body, rdf(_,Name,literal(Literal)), DefNS, Out, _, Indent, _) :- 495 !, 496 rdf_p_id(Name, DefNS, NameText), 497 format(Out, '~N~*|<', [Indent]), 498 rdf_write_id(Out, NameText), 499 ( Literal = lang(Lang, Value) 500 -> rdf_id(Lang, DefNS, LangText), 501 format(Out, ' xml:lang="~w">', [LangText]) 502 ; Literal = type(Type, Value) 503 -> ( rdf_equal(Type, rdf:'XMLLiteral') 504 -> write(Out, ' rdf:parseType="Literal">'), 505 Value = Literal 506 ; stream_property(Out, encoding(Encoding)), 507 rdf_value(Type, QVal, Encoding), 508 format(Out, ' rdf:datatype="~w">', [QVal]) 509 ) 510 ; atomic(Literal) 511 -> write(Out, '>'), 512 Value = Literal 513 ; write(Out, ' rdf:parseType="Literal">'), 514 Value = Literal 515 ), 516 save_attribute_value(Value, Out, Indent), 517 write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>'). 518save_attribute(body, rdf(_, Name, Value), DefNS, Out, NodeIDs, Indent, Anon) :- 519 rdf_is_bnode(Value), 520 !, 521 ( memberchk(anon(Value, Done, ValueTriples), Anon) 522 -> true 523 ; ValueTriples = [] 524 ), 525 rdf_p_id(Name, DefNS, NameText), 526 format(Out, '~N~*|<', [Indent]), 527 rdf_write_id(Out, NameText), 528 ( var(Done) 529 -> Done = true, 530 SubIndent is Indent + 2, 531 ( rdf_equal(RdfType, rdf:type), 532 rdf_equal(ListClass, rdf:'List'), 533 memberchk(rdf(_, RdfType, ListClass), ValueTriples) 534 -> format(Out, ' rdf:parseType="Collection">~n', []), 535 rdf_save_list(ValueTriples, Out, Value, NodeIDs, DefNS, SubIndent, Anon) 536 ; format(Out, '>~n', []), 537 rdf_write_subject(ValueTriples, Out, Value, NodeIDs, DefNS, SubIndent, Anon) 538 ), 539 format(Out, '~N~*|</', [Indent]), 540 rdf_write_id(Out, NameText), 541 format(Out, '>~n', []) 542 ; get_assoc(Value, NodeIDs, NodeID) 543 -> format(Out, ' rdf:nodeID="~w"/>', [NodeID]) 544 ; assertion(fail) 545 ). 546save_attribute(body, rdf(_, Name, Value), DefNS, Out, _, Indent, _Anon) :- 547 stream_property(Out, encoding(Encoding)), 548 rdf_value(Value, QVal, Encoding), 549 rdf_p_id(Name, DefNS, NameText), 550 format(Out, '~N~*|<', [Indent]), 551 rdf_write_id(Out, NameText), 552 format(Out, ' rdf:resource="~w"/>', [QVal]). 553 554save_attribute_value(Value, Out, _) :- % strings 555 atom(Value), 556 !, 557 stream_property(Out, encoding(Encoding)), 558 xml_quote_cdata(Value, QVal, Encoding), 559 write(Out, QVal). 560save_attribute_value(Value, Out, _) :- % numbers 561 number(Value), 562 !, 563 writeq(Out, Value). % quoted: preserve floats 564save_attribute_value(Value, Out, Indent) :- 565 xml_is_dom(Value), 566 !, 567 XMLIndent is Indent+2, 568 xml_write(Out, Value, 569 [ header(false), 570 indent(XMLIndent) 571 ]). 572save_attribute_value(Value, _Out, _) :- 573 throw(error(save_attribute_value(Value), _)). 574 575rdf_save_list(_, _, List, _, _, _, _) :- 576 rdf_equal(List, rdf:nil), 577 !. 578rdf_save_list(ListTriples, Out, List, NodeIDs, DefNS, Indent, Anon) :- 579 rdf_equal(RdfFirst, rdf:first), 580 memberchk(rdf(List, RdfFirst, First), ListTriples), 581 ( rdf_is_bnode(First), 582 memberchk(anon(First, true, FirstTriples), Anon) 583 -> nl(Out), 584 rdf_write_subject(FirstTriples, Out, First, NodeIDs, DefNS, Indent, Anon) 585 ; stream_property(Out, encoding(Encoding)), 586 rdf_value(First, QVal, Encoding), 587 format(Out, '~N~*|<rdf:Description about="~w"/>', 588 [Indent, QVal]) 589 ), 590 ( rdf_equal(RdfRest, rdf:rest), 591 memberchk(rdf(List, RdfRest, List2), ListTriples), 592 \+ rdf_equal(List2, rdf:nil), 593 memberchk(anon(List2, true, List2Triples), Anon) 594 -> rdf_save_list(List2Triples, Out, List2, NodeIDs, DefNS, Indent, Anon) 595 ; true 596 ).
604rdf_p_id(LI, _, 'rdf:li') :- 605 is_bag_li_predicate(LI), 606 !. 607rdf_p_id(Resource, DefNS, NSLocal) :- 608 rdf_id(Resource, DefNS, NSLocal).
615is_bag_li_predicate(Pred) :-
616 atom_concat('_:', AN, Pred),
617 catch(atom_number(AN, N), _, true), integer(N), N >= 0,
618 !.
626rdf_id(Id, NS, NS:Local) :- 627 ns(NS, Full), 628 Full \== '', 629 atom_concat(Full, Local, Id), 630 xml_name(Local), 631 !. 632rdf_id(Id, _, NS:Local) :- 633 ns(NS, Full), 634 Full \== '', 635 atom_concat(Full, Local, Id), 636 xml_name(Local), 637 !. 638rdf_id(Id, _, Id).
646rdf_write_id(Out, NS:Local) :- 647 !, 648 format(Out, '~w:~w', [NS, Local]). 649rdf_write_id(Out, Atom) :- 650 write(Out, Atom).
655rdf_att_id(Id, _, NS:Local) :- 656 ns(NS, Full), 657 Full \== '', 658 atom_concat(Full, Local, Id), 659 xml_name(Local), 660 !. 661rdf_att_id(Id, _, Id).
NOTE: the to_be_described/1 trick ensures entity rewrite in resources that start with 'http://t-d-b.org?'. This is a of a hack to save the artchive data in the MultimediaN project. We should use a more general mechanism.
676rdf_value(V, Text, Encoding) :- 677 to_be_described(Prefix), 678 atom_concat(Prefix, V1, V), 679 ns(NS, Full), 680 atom_concat(Full, Local, V1), 681 !, 682 xml_quote_attribute(Local, QLocal, Encoding), 683 atomic_list_concat([Prefix, '&', NS, (';'), QLocal], Text). 684rdf_value(V, Text, Encoding) :- 685 ns(NS, Full), 686 atom_concat(Full, Local, V), 687 !, 688 xml_quote_attribute(Local, QLocal, Encoding), 689 atomic_list_concat(['&', NS, (';'), QLocal], Text). 690rdf_value(V, Q, Encoding) :- 691 xml_quote_attribute(V, Q, Encoding). 692 693to_be_described('http://t-d-b.org?'). 694 695 696 /******************************* 697 * UTIL * 698 *******************************/ 699 700ns(Id, Full) :- 701 rdf_db:ns(Id, Full)
Write RDF/XML from a list of triples
This module writes an RDF/XML document from a list of triples of the format
rdf(Subject, Predicate, Object)
. It is primarily intended for communicating computed RDF model fragments to external programs using RDF/XML.When used from the HTTP library, use the following code: