diff --git a/lib/qa/authorities/linked_data/find_term.rb b/lib/qa/authorities/linked_data/find_term.rb index 03437e23..4fd8b3f3 100644 --- a/lib/qa/authorities/linked_data/find_term.rb +++ b/lib/qa/authorities/linked_data/find_term.rb @@ -15,8 +15,8 @@ def initialize(term_config) @term_config = term_config end - attr_reader :term_config, :full_graph, :filtered_graph, :language, :id, :access_time_s, :normalize_time_s - private :full_graph, :filtered_graph, :language, :id, :access_time_s, :normalize_time_s + attr_reader :term_config, :full_graph, :filtered_graph, :language, :id, :uri, :access_time_s, :normalize_time_s + private :full_graph, :filtered_graph, :language, :id, :uri, :access_time_s, :normalize_time_s delegate :term_subauthority?, :prefixes, :authority_name, to: :term_config @@ -80,6 +80,7 @@ def perform_normalization return full_graph.dump(:jsonld, standard_prefixes: true) if jsonld? filter_graph + extract_uri results = map_results convert_results_to_json(results) end @@ -107,19 +108,42 @@ def map_results ldpath_map: ldpaths_for_term, predicate_map: preds_for_term) end + # special processing for loc ids for backward compatibility def normalize_id return id if expects_uri? - authority_name.to_s.casecmp('loc').zero? ? id.delete(' ') : id + loc? ? id.delete(' ') : id + end + + # special processing for loc ids for backward compatibility + def loc_id + loc_id = URI.unescape(id) + digit_idx = loc_id.index(/\d/) + loc_id.insert(digit_idx, ' ') if loc? && loc_id.index(' ').blank? && digit_idx > 0 + loc_id + end + + # determine if the current authority is LOC which may require special processing of its ids for backward compatibility + def loc? + authority_name.to_s.casecmp('loc').zero? end def expects_uri? term_config.term_id_expects_uri? end - def uri - return @uri if @uri.present? + def extract_uri return @uri = RDF::URI.new(id) if expects_uri? - @uri = graph_service.subjects_for_object_value(graph: @filtered_graph, predicate: RDF::URI.new(term_config.term_results_id_predicate), object_value: id.gsub('%20', ' ')).first + @uri = graph_service.subjects_for_object_value(graph: @filtered_graph, predicate: RDF::URI.new(term_config.term_results_id_predicate), object_value: URI.unescape(id)).first + return @uri unless loc? && @uri.blank? + # for backward compatibility, if an loc id as passed in fails to extract the URI, try to adding a blank to the id + @uri = graph_service.subjects_for_object_value(graph: @filtered_graph, predicate: RDF::URI.new(term_config.term_results_id_predicate), object_value: loc_id).first + if @uri.present? + Qa.deprecation_warning( + in_msg: 'Qa::Authorities::LinkedData::FindTerm', + msg: 'Special processing of LOC ids is deprecated; id should be an exact match of the id in the graph' + ) + end + @uri end def ldpaths_for_term diff --git a/spec/fixtures/lod_loc_second_term_found.rdf.xml b/spec/fixtures/lod_loc_second_term_found.rdf.xml new file mode 100644 index 00000000..3a9a44b9 --- /dev/null +++ b/spec/fixtures/lod_loc_second_term_found.rdf.xml @@ -0,0 +1,68 @@ + + + + More Science + + + More Science + + + + + + More Natural science + + + More Natural science + + + + + + + + More Science of science + + + More Science of science + + + + + + + + More Sciences + + + More Sciences + + + + + sh 1234 + + More Science + + + + More Natural science + + + + + + More Science of science + + + + + + More Sciences + + + More Natural science + More Science of science + More Sciences + + diff --git a/spec/lib/authorities/linked_data/find_term_spec.rb b/spec/lib/authorities/linked_data/find_term_spec.rb index 659a021d..f21e89a6 100644 --- a/spec/lib/authorities/linked_data/find_term_spec.rb +++ b/spec/lib/authorities/linked_data/find_term_spec.rb @@ -95,11 +95,17 @@ context 'in LOC authority' do context 'term found' do - let :results do + before do stub_request(:get, 'http://id.loc.gov/authorities/subjects/sh85118553') .to_return(status: 200, body: webmock_fixture('lod_loc_term_found.rdf.xml'), headers: { 'Content-Type' => 'application/rdf+xml' }) - lod_loc.find('sh 85118553', subauth: 'subjects') + stub_request(:get, 'http://id.loc.gov/authorities/subjects/sh1234') + .to_return(status: 200, body: webmock_fixture('lod_loc_second_term_found.rdf.xml'), headers: { 'Content-Type' => 'application/rdf+xml' }) end + + let(:results) { lod_loc.find('sh 85118553', subauth: 'subjects') } + let(:second_results) { lod_loc.find('sh 1234', subauth: 'subjects') } + let(:results_without_blank) { lod_loc.find('sh85118553', subauth: 'subjects') } + it 'has correct primary predicate values' do expect(results[:uri]).to eq 'http://id.loc.gov/authorities/subjects/sh85118553' expect(results[:uri]).to be_kind_of String @@ -156,6 +162,19 @@ .to eq ['headings beginning with the word [Scientific;] and subdivision [Science] under ethnic groups and individual wars, e.g. [World War, 1939-1945--Science]'] expect(results['predicates']['http://www.w3.org/2004/02/skos/core#inScheme']).to eq ['http://id.loc.gov/authorities/subjects'] end + + it 'has correct primary predicate values for second request' do + expect(results[:uri]).to eq 'http://id.loc.gov/authorities/subjects/sh85118553' + expect(second_results[:uri]).to eq 'http://id.loc.gov/authorities/subjects/sh1234' + expect(second_results[:uri]).to be_kind_of String + expect(second_results[:id]).to eq 'sh 1234' + expect(second_results[:label]).to eq ['More Science'] + expect(second_results[:altlabel]).to include('More Natural science', 'More Science of science', 'More Sciences') + end + + it 'extracts correct uri when loc id does not have blank' do + expect(results_without_blank[:uri]).to eq 'http://id.loc.gov/authorities/subjects/sh85118553' + end end end