36
37:- module(rdf_litindex,
38 [ rdf_set_literal_index_option/1, 39 rdf_tokenize_literal/2, 40 rdf_find_literal/2, 41 rdf_find_literals/2, 42 rdf_token_expansions/2, 43 rdf_stopgap_token/1, 44
45 rdf_literal_index/2, 46 rdf_delete_literal_index/1 47 ]). 48:- autoload(rdf_db,
49 [ rdf_keys_in_literal_map/3,
50 rdf_find_literal_map/3,
51 rdf_new_literal_map/1,
52 rdf_monitor/2,
53 rdf_current_literal/1,
54 rdf_reset_literal_map/1,
55 rdf_insert_literal_map/4,
56 rdf_delete_literal_map/2,
57 rdf/3,
58 rdf_delete_literal_map/3,
59 rdf_insert_literal_map/3,
60 rdf_statistics_literal_map/2
61 ]). 62:- autoload(library(apply),[maplist/3]). 63:- autoload(library(debug),[debug/3]). 64:- autoload(library(double_metaphone),[double_metaphone/2]). 65:- autoload(library(error),
66 [instantiation_error/1,must_be/2,domain_error/2]). 67:- autoload(library(lists),[member/2,flatten/2,append/3]). 68:- autoload(library(porter_stem),[tokenize_atom/2]). 69:- autoload(library(snowball),[snowball/3]). 70
78
79:- dynamic
80 literal_map/2, 81 map_building/2, 82 new_token/2, 83 setting/1,
84 stopgap/1. 85:- volatile
86 literal_map/2. 87:- multifile
88 tokenization/2, 89 exclude_from_index/2. 90
91
92setting(verbose(false)). 93setting(index_threads(1)). 94setting(index(thread(1))). 95setting(stopgap_threshold(50000)). 96
118
119rdf_set_literal_index_option([]) :- !.
120rdf_set_literal_index_option([H|T]) :-
121 !,
122 set_option(H),
123 rdf_set_literal_index_option(T).
124rdf_set_literal_index_option(Option) :-
125 set_option(Option).
126
127set_option(Term) :-
128 check_option(Term),
129 functor(Term, Name, Arity),
130 functor(General, Name, Arity),
131 retractall(setting(General)),
132 assert(setting(Term)).
133
134check_option(X) :-
135 var(X),
136 !,
137 instantiation_error(X).
138check_option(verbose(X)) :-
139 !,
140 must_be(boolean, X).
141check_option(index_threads(Count)) :-
142 !,
143 must_be(nonneg, Count).
144check_option(stopgap_threshold(Count)) :-
145 !,
146 must_be(nonneg, Count).
147check_option(index(How)) :-
148 !,
149 must_be(oneof([default,thread(_),self]), How).
150check_option(Option) :-
151 domain_error(literal_option, Option).
152
153
154 157
187
188rdf_find_literal(Spec, Literal) :-
189 rdf_find_literals(Spec, Literals),
190 member(Literal, Literals).
191
192rdf_find_literals(Spec, Literals) :-
193 compile_spec(Spec, DNF),
194 DNF \== @(stopgap),
195 token_index(Map),
196 lookup(DNF, Map, _, SuperSet),
197 flatten(SuperSet, Set0),
198 sort(Set0, Literals).
199
204
205rdf_token_expansions(prefix(Prefix), [prefix(Prefix, Tokens)]) :-
206 token_index(Map),
207 rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens).
208rdf_token_expansions(sounds(Like), [sounds(Like, Tokens)]) :-
209 metaphone_index(Map),
210 rdf_find_literal_map(Map, [Like], Tokens).
211rdf_token_expansions(stem(Like), [stem(Like, Tokens)]) :-
212 stem_index(Map),
213 rdf_find_literal_map(Map, [Like], Tokens).
214rdf_token_expansions(Spec, Expansions) :-
215 compile_spec(Spec, DNF),
216 token_index(Map),
217 lookup(DNF, Map, SCS, _),
218 flatten(SCS, CS),
219 sort(CS, Expansions0),
220 join_expansions(Expansions0, Expansions).
221
222join_expansions([], []).
223join_expansions([H0|T0], [H|T]) :-
224 untag(H0, Tag, V0),
225 Tag =.. L0,
226 append(L0, [[V0|Values]], L1),
227 H =.. L1,
228 join_expansions_by_tag(T0, Tag, T1, Values),
229 join_expansions(T1, T).
230
231join_expansions_by_tag([H|T0], Tag, T, [V0|VT]) :-
232 untag(H, Tag, V0),
233 !,
234 join_expansions_by_tag(T0, Tag, T, VT).
235join_expansions_by_tag(L, _, L, []).
236
237lookup(@(false), _, [], []) :- !.
238lookup(or(H0,T0), Map, [CH|CT], [H|T]) :-
239 !,
240 lookup(H0, Map, CH, H),
241 lookup(T0, Map, CT, T).
242lookup(H0, Map, [C], [H]) :-
243 lookup1(H0, Map, C, H).
244
245lookup1(Conj, Map, Cond, Literals) :-
246 phrase(conj_to_list(Conj), List),
247 !,
248 rdf_find_literal_map(Map, List, Literals),
249 ( Literals \== []
250 -> phrase(conj_to_cond(Conj), Cond)
251 ; Cond = []
252 ).
253lookup1(_, _, _, []).
254
255conj_to_list(and(A,B)) -->
256 !,
257 conj_to_list(A),
258 conj_to_list(B).
259conj_to_list(@(false)) -->
260 !,
261 {fail}.
262conj_to_list(Tagged) -->
263 { untag(Tagged, L) },
264 !,
265 [L].
266conj_to_list(L) -->
267 [L].
268
269
270conj_to_cond(and(A,B)) -->
271 !,
272 conj_to_cond(A),
273 conj_to_cond(B).
274conj_to_cond(Tagged) -->
275 { untag(Tagged, _) },
276 !,
277 [ Tagged ].
278conj_to_cond(_) -->
279 [].
280
281
285
286compile_spec(Spec, DNF) :-
287 expand_fuzzy(Spec, Spec2),
288 nnf(Spec2, NNF),
289 dnf(NNF, DNF).
290
291
292expand_fuzzy(Var, _) :-
293 var(Var),
294 !,
295 throw(error(instantiation_error, _)).
296expand_fuzzy(sounds(Like), Or) :-
297 !,
298 ( atom(Like)
299 -> metaphone_index(Map),
300 double_metaphone(Like, Key),
301 rdf_find_literal_map(Map, [Key], Tokens),
302 list_to_or(Tokens, sounds(Like), Or)
303 ; expand_fuzzy(Like, Or)
304 ).
305expand_fuzzy(stem(Like), Or) :-
306 !,
307 expand_fuzzy(stem(Like, en), Or).
308expand_fuzzy(stem(Like, Lang), Or) :-
309 !,
310 ( atom(Like)
311 -> stem_index(Map),
312 stem(Like, Lang, Key),
313 rdf_find_literal_map(Map, [Key], Tokens),
314 list_to_or(Tokens, stem(Like), Or)
315 ; expand_fuzzy(Like, Or)
316 ).
317expand_fuzzy(prefix(Prefix), Or) :-
318 !,
319 ( atom(Prefix)
320 -> token_index(Map),
321 rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens),
322 list_to_or(Tokens, prefix(Prefix), Or)
323 ; expand_fuzzy(Prefix, Or)
324 ).
325expand_fuzzy(case(String), Or) :-
326 !,
327 ( atom(String)
328 -> token_index(Map),
329 rdf_keys_in_literal_map(Map, case(String), Tokens),
330 list_to_or(Tokens, case(String), Or)
331 ; expand_fuzzy(String, Or)
332 ).
333expand_fuzzy(or(A0, B0), E) :-
334 !,
335 expand_fuzzy(A0, A),
336 expand_fuzzy(B0, B),
337 simplify(or(A,B), E).
338expand_fuzzy(and(A0, B0), E) :-
339 !,
340 expand_fuzzy(A0, A),
341 expand_fuzzy(B0, B),
342 simplify(and(A,B), E).
343expand_fuzzy(not(A0), not(A)) :-
344 !,
345 expand_fuzzy(A0, A).
346expand_fuzzy(between(Low, High), Or) :-
347 !,
348 token_index(Map),
349 rdf_keys_in_literal_map(Map, between(Low, High), Tokens),
350 list_to_or(Tokens, between(Low, High), Or).
351expand_fuzzy(le(High), Or) :-
352 !,
353 token_index(Map),
354 rdf_keys_in_literal_map(Map, le(High), Tokens),
355 list_to_or(Tokens, le(High), Or).
356expand_fuzzy(ge(Low), Or) :-
357 !,
358 token_index(Map),
359 rdf_keys_in_literal_map(Map, ge(Low), Tokens),
360 list_to_or(Tokens, ge(Low), Or).
361expand_fuzzy(Token, Result) :-
362 atomic(Token),
363 !,
364 ( rdf_stopgap_token(Token)
365 -> Result = @(stopgap)
366 ; Result = Token
367 ).
368expand_fuzzy(Token, _) :-
369 throw(error(type_error(Token, boolean_expression), _)).
370
371simplify(Expr0, Expr) :-
372 simple(Expr0, Expr),
373 !.
374simplify(Expr, Expr).
375
376simple(and(@(false), _), @(false)).
377simple(and(_, @(false)), @(false)).
378simple(and(@(stopgap), Token), Token).
379simple(and(Token, @(stopgap)), Token).
380simple(or(@(false), X), X).
381simple(or(X, @(false)), X).
382simple(or(@(stopgap), Token), Token).
383simple(or(Token, @(stopgap)), Token).
384
385
386list_to_or([], _, @(false)) :- !.
387list_to_or([X], How, One) :-
388 !,
389 tag(How, X, One).
390list_to_or([H0|T0], How, or(H, T)) :-
391 tag(How, H0, H),
392 list_to_or(T0, How, T).
393
394tag(sounds(X), Y, sounds(X,Y)).
395tag(stem(X), Y, stem(X,Y)).
396tag(prefix(X), Y, prefix(X,Y)).
397tag(case(X), Y, case(X,Y)).
398tag(between(L,H), Y, between(L,H,Y)).
399tag(ge(L), Y, ge(L,Y)).
400tag(le(H), Y, le(H,Y)).
401
402untag(sounds(_,Y), Y).
403untag(stem(_,Y), Y).
404untag(prefix(_,Y), Y).
405untag(case(_,Y), Y).
406untag(between(_,_,Y), Y).
407untag(le(_,Y), Y).
408untag(ge(_,Y), Y).
409
410untag(sounds(X,Y), sounds(X), Y).
411untag(stem(X,Y), stem(X), Y).
412untag(prefix(X,Y), prefix(X), Y).
413untag(case(X,Y), case(X), Y).
414untag(between(L,H,Y), between(L,H), Y).
415untag(ge(L,Y), ge(L), Y).
416untag(le(H,Y), le(H), Y).
417
418
423
424nnf(not(not(A0)), A) :-
425 !,
426 nnf(A0, A).
427nnf(not(and(A0,B0)), or(A,B)) :-
428 !,
429 nnf(not(A0), A),
430 nnf(not(B0), B).
431nnf(not(or(A0,B0)), and(A,B)) :-
432 !,
433 nnf(not(A0), A),
434 nnf(not(B0), B).
435nnf(A, A).
436
437
441
442dnf(or(A0,B0), or(A, B)) :-
443 !,
444 dnf(A0, A),
445 dnf(B0, B).
446dnf(and(A0,B0), DNF):-
447 !,
448 dnf(A0, A1),
449 dnf(B0, B1),
450 dnf1(and(A1,B1), DNF).
451dnf(DNF, DNF).
452
453dnf1(and(A0, or(B,C)), or(P,Q)) :-
454 !,
455 dnf1(and(A0,B), P),
456 dnf1(and(A0,C), Q).
457dnf1(and(or(B,C), A0), or(P,Q)) :-
458 !,
459 dnf1(and(A0,B), P),
460 dnf1(and(A0,C), Q).
461dnf1(DNF, DNF).
462
463
464 467
473
474token_index(Map) :-
475 literal_map(token, Map),
476 !,
477 wait_for_map(token).
478token_index(Map) :-
479 rdf_new_literal_map(Map),
480 assert(literal_map(token, Map)),
481 register_token_updater,
482 message_queue_create(Queue),
483 assert(map_building(token, Queue)),
484 thread_create(make_literal_index(Queue), _,
485 [ alias('__rdf_tokenizer'),
486 detached(true)
487 ]),
488 wait_for_map(token).
489
490register_token_updater :-
491 Monitor = [ reset,
492 new_literal,
493 old_literal
494 ],
495 ( setting(index(default))
496 -> create_update_literal_thread(1),
497 rdf_monitor(thread_monitor_literal, Monitor)
498 ; setting(index(thread(N)))
499 -> create_update_literal_thread(N),
500 rdf_monitor(thread_monitor_literal, Monitor)
501 ; rdf_monitor(monitor_literal, Monitor)
502 ).
503
504make_literal_index(Queue) :-
505 call_cleanup(
506 make_literal_index,
507 ( message_queue_destroy(Queue),
508 retractall(map_building(token, _)))).
509
513
514make_literal_index :-
515 setting(index_threads(N)),
516 !,
517 threaded_literal_index(N),
518 verbose('~N', []).
519make_literal_index :-
520 current_prolog_flag(cpu_count, X),
521 threaded_literal_index(X),
522 verbose('~N', []).
523
524threaded_literal_index(N) :-
525 N > 1,
526 !,
527 message_queue_create(Q, [max_size(1000)]),
528 create_index_threads(N, Q, Ids),
529 forall(rdf_current_literal(Literal),
530 thread_send_message(Q, Literal)),
531 forall(between(1, N, _),
532 thread_send_message(Q, done(true))),
533 maplist(thread_join, Ids, _).
534threaded_literal_index(_) :-
535 forall(rdf_current_literal(Literal),
536 register_literal(Literal)).
537
538create_index_threads(N, Q, [Id|T]) :-
539 N > 0,
540 !,
541 thread_create(index_worker(Q), Id, []),
542 N2 is N - 1,
543 create_index_threads(N2, Q, T).
544create_index_threads(_, _, []) :- !.
545
546index_worker(Queue) :-
547 repeat,
548 thread_get_message(Queue, Msg),
549 work(Msg).
550
551work(done(true)) :- !.
552work(Literal) :-
553 register_literal(Literal),
554 fail.
555
556
560
561clean_token_index :-
562 forall(literal_map(_, Map),
563 rdf_reset_literal_map(Map)),
564 retractall(stopgap(_)).
565
569
570rdf_delete_literal_index(Type) :-
571 must_be(atom, Type),
572 ( retract(literal_map(Type, Map))
573 -> rdf_reset_literal_map(Map) 574 ).
575
576 579
589
590create_update_literal_thread(Threads) :-
591 message_queue_create(_,
592 [ alias(rdf_literal_monitor_queue),
593 max_size(50000)
594 ]),
595 forall(between(1, Threads, _),
596 create_index_worker(initial)).
597
598:- dynamic
599 index_worker_id/1,
600 extra_worker_count/1. 601
602create_index_worker(Status) :-
603 ( retract(index_worker_id(Id0))
604 -> true
605 ; Id0 = 1
606 ),
607 succ(Id0, Id1),
608 assertz(index_worker_id(Id1)),
609 atom_concat(rdf_literal_monitor_, Id0, Alias),
610 inc_extra_worker_count(Status),
611 thread_create(monitor_literals(Status), _,
612 [ alias(Alias)
613 ]).
614
615monitor_literals(initial) :-
616 set_prolog_flag(agc_margin, 0), 617 repeat,
618 thread_get_message(rdf_literal_monitor_queue, Literal),
619 register_literal(Literal),
620 fail.
621monitor_literals(extra) :-
622 set_prolog_flag(agc_margin, 0),
623 repeat,
624 ( thread_get_message(rdf_literal_monitor_queue, Literal,
625 [ timeout(1)
626 ])
627 -> register_literal(Literal),
628 fail
629 ; !
630 ),
631 with_mutex(create_index_worker, dec_extra_worker_count),
632 thread_self(Me),
633 thread_detach(Me).
634
635thread_monitor_literal(new_literal(Literal)) :-
636 !,
637 thread_send_message(rdf_literal_monitor_queue, Literal).
638thread_monitor_literal(Action) :-
639 !,
640 monitor_literal(Action).
641
646
647check_index_workers(Alias, Keys) :-
648 max_extra_workers(Max),
649 Max > 0,
650 message_queue_property(Queue, alias(Alias)),
651 message_queue_property(Queue, size(Size)),
652 Size > 10000,
653 \+ ( extra_worker_count(Extra),
654 Extra >= Max
655 ),
656 !,
657 debug(rdf_litindex,
658 'Creating extra literal indexer (Queue=~D, Keys=~D)',
659 [Size, Keys]),
660 with_mutex(create_index_worker, create_index_worker(extra)).
661check_index_workers(_, _).
662
(extra) :-
664 !,
665 ( retract(extra_worker_count(C0))
666 -> C is C0+1
667 ; C = 1
668 ),
669 asserta(extra_worker_count(C)).
670inc_extra_worker_count(_).
671
:-
673 retract(extra_worker_count(C0)),
674 !,
675 C is C0-1,
676 asserta(extra_worker_count(C)).
677dec_extra_worker_count.
678
(Max) :-
680 current_prolog_flag(cpu_count, Count),
681 Max is Count//2.
682
683
684 687
688monitor_literal(new_literal(Literal)) :-
689 register_literal(Literal).
690monitor_literal(old_literal(Literal)) :-
691 unregister_literal(Literal).
692monitor_literal(transaction(begin, reset)) :-
693 rdf_monitor(monitor_literal, [-old_literal]),
694 clean_token_index.
695monitor_literal(transaction(end, reset)) :-
696 rdf_monitor(monitor_literal, [+old_literal]).
697
701
702register_literal(Literal) :-
703 ( rdf_tokenize_literal(Literal, Tokens0)
704 -> sort(Tokens0, Tokens),
705 text_of(Literal, Lang, Text),
706 literal_map(token, Map),
707 add_tokens(Tokens, Lang, Text, Map)
708 ; true
709 ).
710
711add_tokens([], _, _, _).
712add_tokens([H|T], Lang, Literal, Map) :-
713 rdf_insert_literal_map(Map, H, Literal, Keys),
714 ( var(Keys)
715 -> ( rdf_keys_in_literal_map(Map, key(H), Count),
716 setting(stopgap_threshold(Threshold)),
717 Count > Threshold
718 -> assert(stopgap(H)),
719 rdf_delete_literal_map(Map, H)
720 ; true
721 )
722 ; forall(new_token(H, Lang), true),
723 ( Keys mod 1000 =:= 0
724 -> progress(Map, 'Tokens'),
725 ( Keys mod 10000 =:= 0
726 -> check_index_workers(rdf_literal_monitor_queue, Keys)
727 ; true
728 )
729 ; true
730 )
731 ),
732 add_tokens(T, Lang, Literal, Map).
733
734
740
741unregister_literal(Literal) :-
742 text_of(Literal, _Lang, Text),
743 ( rdf(_,_,literal(Text))
744 -> true 745 ; rdf_tokenize_literal(Literal, Tokens0),
746 sort(Tokens0, Tokens),
747 literal_map(token, Map),
748 del_tokens(Tokens, Text, Map)
749 ).
750
751del_tokens([], _, _).
752del_tokens([H|T], Literal, Map) :-
753 rdf_delete_literal_map(Map, H, Literal),
754 del_tokens(T, Literal, Map).
755
756
761
762rdf_tokenize_literal(Literal, Tokens) :-
763 tokenization(Literal, Tokens),
764 !. 765rdf_tokenize_literal(Literal, Tokens) :-
766 text_of(Literal, _Lang, Text),
767 atom(Text),
768 tokenize_atom(Text, Tokens0),
769 select_tokens(Tokens0, Tokens).
770
771select_tokens([], []).
772select_tokens([H|T0], T) :-
773 ( exclude_from_index(token, H)
774 -> select_tokens(T0, T)
775 ; number(H)
776 -> ( integer(H),
777 between(-1073741824, 1073741823, H)
778 -> T = [H|T1],
779 select_tokens(T0, T1)
780 ; select_tokens(T0, T)
781 )
782 ; atom_length(H, 1)
783 -> select_tokens(T0, T)
784 ; default_stopgap(H)
785 -> select_tokens(T0, T)
786 ; stopgap(H)
787 -> select_tokens(T0, T)
788 ; T = [H|T1],
789 select_tokens(T0, T1)
790 ).
791
802
803rdf_stopgap_token(Token) :-
804 ( var(Token)
805 -> rdf_stopgap_token2(Token)
806 ; rdf_stopgap_token2(Token), !
807 ).
808
809rdf_stopgap_token2(Token) :-
810 exclude_from_index(token, Token).
811rdf_stopgap_token2(Token) :-
812 default_stopgap(Token).
813rdf_stopgap_token2(Token) :-
814 atom(Token),
815 atom_length(Token, 1).
816rdf_stopgap_token2(Token) :-
817 stopgap(Token).
818
825
826default_stopgap(and).
827default_stopgap(an).
828default_stopgap(or).
829default_stopgap(of).
830default_stopgap(on).
831default_stopgap(in).
832default_stopgap(this).
833default_stopgap(the).
834
835
843
844text_of(type(xsd:string, Text), en, Text) :- !.
845text_of(type(_, Text), -, Text) :- !.
846text_of(lang(Lang, Text), Lang, Text) :- !.
847text_of(Text, en, Text) :- atom(Text), !.
848text_of(Text, -, Text) :- integer(Text).
849
850
851 854
860
861stem_index(Map) :-
862 literal_map(stem, Map),
863 !,
864 wait_for_map(stem).
865stem_index(Map) :-
866 rdf_new_literal_map(Map),
867 assert(literal_map(stem, Map)),
868 assert((new_token(Token, Lang) :- add_stem(Token, Lang, Map))),
869 message_queue_create(Queue),
870 assert(map_building(stem, Queue)),
871 thread_create(fill_stem_index(Map, Queue), _,
872 [ alias('__rdf_stemmer'),
873 detached(true)
874 ]),
875 wait_for_map(stem).
876
877wait_for_map(MapName) :-
878 ( map_building(MapName, Queue)
879 -> catch(thread_get_message(Queue, _), _, true),
880 wait_for_map(MapName)
881 ; true
882 ).
883
884fill_stem_index(StemMap, Queue) :-
885 call_cleanup(
886 forall(rdf_current_literal(Literal),
887 stem_literal_tokens(Literal, StemMap)),
888 ( message_queue_destroy(Queue),
889 retractall(map_building(stem, _)))).
890
891stem_literal_tokens(Literal, StemMap) :-
892 rdf_tokenize_literal(Literal, Tokens),
893 !,
894 sort(Tokens, Tokens1),
895 text_of(Literal, Lang, _Text),
896 insert_tokens_stem(Tokens1, Lang, StemMap).
897stem_literal_tokens(_,_).
898
899insert_tokens_stem([], _, _).
900insert_tokens_stem([Token|T], Lang, Map) :-
901 ( atom(Token)
902 -> ( stem(Token, Lang, Stem)
903 -> rdf_insert_literal_map(Map, Stem, Token, Keys),
904 ( integer(Keys),
905 Keys mod 1000 =:= 0
906 -> progress(Map, 'Stem')
907 ; true
908 )
909 ; true
910 )
911 ; true
912 ),
913 insert_tokens_stem(T, Lang, Map).
914
915
916add_stem(Token, Lang, Map) :-
917 stem(Lang, Token, Stem),
918 rdf_insert_literal_map(Map, Stem, Token, _).
919
920stem(Token, LangSpec, Stem) :-
921 main_lang(LangSpec, Lang),
922 downcase_atom(Token, Lower),
923 catch(snowball(Lang, Lower, Stem), _, fail).
924
925main_lang(LangSpec, Lang) :-
926 sub_atom(LangSpec, Before, _, _, -),
927 !,
928 sub_atom(LangSpec, 0, Before, _, Lang).
929main_lang(LangSpec, Lang) :-
930 downcase_atom(LangSpec, Lang).
931
932
933 936
937
938metaphone_index(Map) :-
939 literal_map(metaphone, Map),
940 !,
941 wait_for_map(metaphone).
942metaphone_index(Map) :-
943 rdf_new_literal_map(Map),
944 assert(literal_map(metaphone, Map)),
945 assert((new_token(Token, Lang) :- add_metaphone(Token, Lang, Map))),
946 message_queue_create(Queue),
947 assert(map_building(metaphone, Queue)),
948 thread_create(fill_metaphone_index(Map, Queue), _,
949 [ alias('__rdf_metaphone_indexer'),
950 detached(true)
951 ]),
952 wait_for_map(metaphone).
953
954fill_metaphone_index(MetaphoneMap, Queue) :-
955 call_cleanup(
956 fill_metaphone_index(MetaphoneMap),
957 ( message_queue_destroy(Queue),
958 retractall(map_building(metaphone, _)))).
959
960fill_metaphone_index(MetaphoneMap) :-
961 token_index(TokenMap),
962 rdf_keys_in_literal_map(TokenMap, all, Tokens),
963 metaphone(Tokens, MetaphoneMap).
964
965metaphone([], _).
966metaphone([Token|T], Map) :-
967 ( atom(Token),
968 double_metaphone(Token, SoundEx)
969 -> rdf_insert_literal_map(Map, SoundEx, Token, Keys),
970 ( integer(Keys),
971 Keys mod 1000 =:= 0
972 -> progress(Map, 'Metaphone')
973 ; true
974 )
975 ; true
976 ),
977 metaphone(T, Map).
978
979
980add_metaphone(Token, _Lang, Map) :-
981 atom(Token),
982 !,
983 double_metaphone(Token, SoundEx),
984 rdf_insert_literal_map(Map, SoundEx, Token).
985add_metaphone(_, _, _).
986
1003
1004rdf_literal_index(token, Map) :-
1005 !,
1006 token_index(Map).
1007rdf_literal_index(stem, Map) :-
1008 !,
1009 stem_index(Map).
1010rdf_literal_index(metaphone, Map) :-
1011 !,
1012 metaphone_index(Map).
1013rdf_literal_index(Type, _Map) :-
1014 domain_error(literal_index, Type).
1015
1016
1017 1020
1021verbose(Fmt, Args) :-
1022 setting(verbose(true)),
1023 !,
1024 format(user_error, Fmt, Args).
1025verbose(_, _).
1026
1027progress(Map, Which) :-
1028 setting(verbose(true)),
1029 !,
1030 rdf_statistics_literal_map(Map, size(Keys, Values)),
1031 format(user_error,
1032 '\r~t~w: ~12|Keys: ~t~D~15+; Values: ~t~D~20+',
1033 [Which, Keys, Values]).
1034progress(_,_)