1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2007-2020, VU University Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(rdf_cache, 37 [ rdf_set_cache_options/1, % +Options 38 rdf_cache_file/3 % +URL, +RW, -File 39 ]). 40:- autoload(library(error),[must_be/2,domain_error/2]). 41:- autoload(library(filesex),[make_directory_path/1]). 42 43/** <module> Cache RDF triples 44 45The library library(semweb/rdf_cache) defines the caching strategy for 46triples sources. When using large RDF sources, caching triples greatly 47speedup loading RDF documents. The cache library implements two caching 48strategies that are controlled by rdf_set_cache_options/1. 49 50*|Local caching|* This approach applies to files only. Triples are 51cached in a sub-directory of the directory holding the source. This 52directory is called =|.cache|= (=|_cache|= on Windows). If the cache 53option =create_local_directory= is =true=, a cache directory is created 54if posible. 55 56*|Global caching|* This approach applies to all sources, except for 57unnamed streams. Triples are cached in directory defined by the cache 58option =global_directory=. 59 60When loading an RDF file, the system scans the configured cache files 61unless cache(false) is specified as option to rdf_load/2 or caching is 62disabled. If caching is enabled but no cache exists, the system will try 63to create a cache file. First it will try to do this locally. On failure 64it will try to configured global cache. 65*/ 66 67:- dynamic 68 cache_option/1. 69 70set_setfault_options :- 71 assert(cache_option(enabled(true))), 72 ( current_prolog_flag(windows, true) 73 -> assert(cache_option(local_directory('_cache'))) 74 ; assert(cache_option(local_directory('.cache'))) 75 ). 76 77:- set_setfault_options. % _only_ when loading! 78 79%! rdf_set_cache_options(+Options) 80% 81% Change the cache policy. Provided options are: 82% 83% * enabled(Boolean) 84% If =true=, caching is enabled. 85% 86% * local_directory(Name). 87% Plain name of local directory. Default =|.cache|= 88% (=|_cache|= on Windows). 89% 90% * create_local_directory(Bool) 91% If =true=, try to create local cache directories 92% 93% * global_directory(Dir) 94% Writeable directory for storing cached parsed files. 95% 96% * create_global_directory(Bool) 97% If =true=, try to create the global cache directory. 98 99rdf_set_cache_options([]) :- !. 100rdf_set_cache_options([H|T]) :- 101 !, 102 rdf_set_cache_options(H), 103 rdf_set_cache_options(T). 104rdf_set_cache_options(Opt) :- 105 functor(Opt, Name, Arity), 106 arg(1, Opt, Value), 107 ( cache_option(Name, Type) 108 -> must_be(Type, Value) 109 ; domain_error(cache_option, Opt) 110 ), 111 functor(Gen, Name, Arity), 112 retractall(cache_option(Gen)), 113 expand_option(Opt, EOpt), 114 assert(cache_option(EOpt)). 115 116cache_option(enabled, boolean). 117cache_option(local_directory, atom). 118cache_option(create_local_directory, boolean). 119cache_option(global_directory, atom). 120cache_option(create_global_directory, boolean). 121 122expand_option(global_directory(Local), global_directory(Global)) :- 123 !, 124 absolute_file_name(Local, Global). 125expand_option(Opt, Opt). 126 127 128%! rdf_cache_file(+URL, +ReadWrite, -File) is semidet. 129% 130% File is the cache file for URL. If ReadWrite is =read=, it 131% returns the name of an existing file. If =write= it returns 132% where a new cache file can be overwritten or created. 133 134rdf_cache_file(_URL, _, _File) :- 135 cache_option(enabled(false)), 136 !, 137 fail. 138rdf_cache_file(URL, read, File) :- 139 !, 140 ( atom_concat('file://', Path, URL), 141 cache_option(local_directory(Local)), 142 file_directory_name(Path, Dir), 143 local_cache_file(URL, LocalFile), 144 atomic_list_concat([Dir, Local, LocalFile], /, File) 145 ; cache_option(global_directory(Dir)), 146 url_cache_file(URL, Dir, trp, read, File) 147 ), 148 access_file(File, read), 149 !. 150rdf_cache_file(URL, write, File) :- 151 !, 152 ( atom_concat('file://', Path, URL), 153 cache_option(local_directory(Local)), 154 file_directory_name(Path, Dir), 155 ( cache_option(create_local_directory(true)) 156 -> RWDir = write 157 ; RWDir = read 158 ), 159 ensure_dir(Dir, Local, RWDir, CacheDir), 160 local_cache_file(URL, LocalFile), 161 atomic_list_concat([CacheDir, LocalFile], /, File) 162 ; cache_option(global_directory(Dir)), 163 ensure_global_cache(Dir), 164 url_cache_file(URL, Dir, trp, write, File) 165 ), 166 access_file(File, write), 167 !. 168 169 170ensure_global_cache(Dir) :- 171 exists_directory(Dir), 172 !. 173ensure_global_cache(Dir) :- 174 cache_option(create_global_directory(true)), 175 make_directory_path(Dir), 176 print_message(informational, rdf(cache_created(Dir))). 177 178 179 /******************************* 180 * LOCAL CACHE * 181 *******************************/ 182 183%! local_cache_file(+FileURL, -File) is det. 184% 185% Return the name of the cache file for FileURL. The name is the 186% plain filename with the .trp extension. As the URL is a file 187% URL, it is guaranteed to be a valid filename. Assumes the 188% hosting OS can handle multiple exensions (=|.x.y|=) though. 189% These days thats even true on Windows. 190 191local_cache_file(URL, File) :- 192 file_base_name(URL, Name), 193 file_name_extension(Name, trp, File). 194 195 196 /******************************* 197 * GLOBAL CACHE * 198 *******************************/ 199 200%! url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet. 201% 202% Determine location of cache-file for the given URL in Dir. If 203% Ext is provided, the returned Path is ensured to have the 204% specified extension. 205% 206% @param RW If =read=, no directories are created and the call 207% fails if URL is not in the cache. 208 209url_cache_file(URL, Dir, Ext, RW, Path) :- 210 term_hash(URL, Hash0), 211 Hash is Hash0 + 100000, % make sure > 4 characters 212 format(string(Hex), '~16r', [Hash]), 213 sub_atom(Hex, _, 2, 0, L1), 214 ensure_dir(Dir, L1, RW, Dir1), 215 sub_atom(Hex, _, 2, 2, L2), 216 ensure_dir(Dir1, L2, RW, Dir2), 217 url_to_file(URL, File), 218 ensure_ext(File, Ext, FileExt), 219 atomic_list_concat([Dir2, /, FileExt], Path). 220 221ensure_dir(D0, Sub, RW, Dir) :- 222 atomic_list_concat([D0, /, Sub], Dir), 223 ( exists_directory(Dir) 224 -> true 225 ; RW == write 226 -> catch(make_directory(Dir), _, fail) 227 ). 228 229ensure_ext(File, '', File) :- !. 230ensure_ext(File, Ext, File) :- 231 file_name_extension(_, Ext, File), 232 !. 233ensure_ext(File, Ext, FileExt) :- 234 file_name_extension(File, Ext, FileExt). 235 236%! url_to_file(+URL, -File) 237% 238% Convert a URL in something that fits in a file, i.e. avoiding / 239% and :. We simply replace these by -. We could also use 240% www_form_encode/2, but confusion when to replace as well as the 241% fact that we loose the '.' (extension) makes this a less ideal 242% choice. We could also consider base64 encoding of the name. 243 244url_to_file(URL, File) :- 245 atom_codes(URL, Codes), 246 phrase(safe_file_name(Codes), FileCodes), 247 atom_codes(File, FileCodes). 248 249safe_file_name([]) --> 250 []. 251safe_file_name([H|T]) --> 252 replace(H), 253 !, 254 safe_file_name(T). 255safe_file_name([H|T]) --> 256 [H], 257 safe_file_name(T). 258 259%! replace(+Code)// 260% 261% Replace a character code that cannot safely be put in a 262% filename. Should we use %XX? 263 264replace(0'/) --> "-". % directory separator 265replace(0'\\) --> "-". % not allowed in Windows filename 266replace(0':) --> "-". % idem 267replace(0'?) --> "-". % idem 268replace(0'*) --> "-". % idem 269 270 271 /******************************* 272 * MESSAGES * 273 *******************************/ 274 275:- multifile prolog:message/3. 276 277prologmessage(rdf(cache_created(Dir))) --> 278 [ 'Created RDF cache directory ~w'-[Dir] ]