semanticarts · kstudzin · Sep 10, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/README.md b/README.md
@@ -188,6 +188,8 @@ optional arguments:
                         Title to use for graph. If not supplied, the repo URI
                         will be used if graphing an endpoint, or 'Gist' if
                         graphing local files.
+  --show-bnode-subjects Use triples with blank nodes in the subject to generate
+                        the graphic.
 
 Sampling Limits:
   --instance-limit INSTANCE_LIMIT

diff --git a/onto_tool/command_line.py b/onto_tool/command_line.py
@@ -198,6 +198,8 @@ def define_graphic_parser(subparsers):
         " graphing an endpoint, or 'Gist' if graphing local files.")
     graphic_parser.add_argument('ontology', nargs="*", default=[],
                                 help="Ontology file, directory or name pattern")
+    graphic_parser.add_argument("--show-bnode-subjects", action="store_true",
+                                help="Include blank node subjects when generating a graph.")
 
 
 def define_export_parser(subparsers):

diff --git a/onto_tool/onto_tool.py b/onto_tool/onto_tool.py
@@ -169,6 +169,8 @@ def generate_graphic(action, onto_files, endpoint, **kwargs):
         Read cached query results
     save_cache: TextIOWrapper
         Save query results as JSON to use with --cache
+    show_bnode_subjects: boolean
+        If true, triples with blank nodes in the subject will not be used to filtered out.
 
     Returns
     -------
@@ -308,7 +310,8 @@ def main(arguments):
                          exclude_pattern=args.exclude_pattern,
                          show_shacl=args.show_shacl,
                          cache=args.cache,
-                         save_cache=args.save_cache)
+                         save_cache=args.save_cache,
+                         show_bnode_subjects=args.show_bnode_subjects)
         return
 
     of = 'pretty-xml' if args.format == 'xml' else args.format

diff --git a/onto_tool/ontograph.py b/onto_tool/ontograph.py
@@ -84,6 +84,7 @@ def __init__(self, files, repo=None, **kwargs):
 
         self.show_shacl = kwargs.get('show_shacl')
         self.shapes = defaultdict(list)
+        self.show_bnode_subjects = kwargs.get('show_bnode_subjects')
 
     def __configure_data_source(self, repo, kwargs, title, version):
         """Determine graph title and output location from data source."""
@@ -172,7 +173,7 @@ def gather_schema_info_from_files(self):
             ontology = next(graph.subjects(RDF.type, OWL.Ontology))
             ontology_name = self.__strip_uri(ontology)
             classes = [self.__strip_uri(c) for c in graph.subjects(RDF.type, OWL.Class)
-                       if not isinstance(c, BNode)]
+                       if not isinstance(c, BNode) or self.show_bnode_subjects]
             obj_props = [self.__strip_uri(c)
                          for c in graph.subjects(RDF.type, OWL.ObjectProperty)]
             data_props = [self.__strip_uri(c)
@@ -182,8 +183,8 @@ def gather_schema_info_from_files(self):
             all_seen = set(classes + obj_props + data_props + annotation_props)
             gist_things = [
                 self.__strip_uri(s) for (s, o) in graph.subject_objects(RDF.type)
-                if not isinstance(s, BNode) and not s == ontology and not self.__strip_uri(s)
-                in all_seen]
+                if (not isinstance(s, BNode) or self.show_bnode_subjects) and
+                not s == ontology and not self.__strip_uri(s) in all_seen]
             imports = [self.__strip_uri(c)
                        for c in graph.objects(ontology, OWL.imports)]
 
@@ -202,6 +203,7 @@ def gather_schema_info_from_files(self):
     def gather_schema_info_from_repo(self):
         """Load schema data from SPARQL endpoint."""
         onto_data = defaultdict(lambda: defaultdict(list))
+        bnode_filter = "filter(!ISBLANK(?entity))" if not self.show_bnode_subjects else ""
         if self.single_graph:
             onto_query = """
             prefix owl: <http://www.w3.org/2002/07/owl#>
@@ -221,7 +223,7 @@ def gather_schema_info_from_repo(self):
                 {
                   ?entity a ?type .
                   FILTER(?type != owl:Ontology)
-                  filter(!ISBLANK(?entity))
+                  $bnode_filter
                 }
               }
             }
@@ -243,7 +245,7 @@ def gather_schema_info_from_repo(self):
               UNION
               {
                 ?entity rdfs:isDefinedBy ?ontology; a ?type .
-                filter(!ISBLANK(?entity))
+                $bnode_filter
               }
             }
             """
@@ -254,7 +256,8 @@ def gather_schema_info_from_repo(self):
             str(OWL.AnnotationProperty): 'annotation_propertiesList',
             str(OWL.imports): 'imports'
         }
-        for entity in self.__remote_select_query(onto_query):
+        for entity in self.__remote_select_query(
+                Template(onto_query).substitute(bnode_filter=bnode_filter)):
             key = mapping.get(entity['type'], 'gist_thingsList')
             onto_data[entity['ontology']][key].append(
                 self.__strip_uri(entity['entity']))
@@ -481,6 +484,8 @@ def gather_instance_info(self):
             json.dump(self.cached_data, self.save_cache)
 
     def __build_class_hierarchy(self):
+        bnode_filter = "filter (!isblank(?class) && !isblank(?parent))\n" \
+            if not self.show_bnode_subjects else ""
         inheritance_query = Template("""
         prefix owl: <http://www.w3.org/2002/07/owl#>
         prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -498,7 +503,7 @@ def __build_class_hierarchy(self):
                     rdf:rest*/rdf:first ?parent .
             ?parent a owl:Class
           }
-          filter (!isblank(?class) && !isblank(?parent))
+          $bnode_filter
           OPTIONAL {
               ?class rdfs:label|skos:prefLabel ?c_label
               FILTER(lang(?c_label) = '$language' || lang(?c_label) = '')
@@ -508,7 +513,7 @@ def __build_class_hierarchy(self):
               FILTER(lang(?p_label) = '$language' || lang(?p_label) = '')
           }
         }
-        """).substitute(language=self.label_lang)
+        """).substitute(bnode_filter=bnode_filter, language=self.label_lang)
         parents = self.__select_query(inheritance_query, 'parents')
 
         for inheritance_info in parents:
@@ -634,6 +639,7 @@ def __add_shacl_coloring(self):
             self.shapes[row['class']].append(row['property'])
 
     def __create_class_count_query(self, limit):
+        bnode_filter = "FILTER(!ISBLANK(?s))" if not self.show_bnode_subjects else ""
         class_query = """
             prefix owl: <http://www.w3.org/2002/07/owl#>
             prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
@@ -646,18 +652,20 @@ def __create_class_count_query(self, limit):
               {
                 select (group_concat(?o;separator=' ') as ?src) where {
                   $pattern
-                  FILTER(!ISBLANK(?s))
+                  $bnode_filter
                   FILTER (!STRSTARTS(STR(?o), 'http://www.w3.org/2002/07/owl#'))
                 } group by ?s LIMIT $limit
               }
             } group by ?src
             """
         query_text = Template(class_query).substitute(
             pattern=self.__filtered_graph_pattern(str(RDF.type)),
+            bnode_filter=bnode_filter,
             limit=limit)
         return query_text
 
     def __create_predicate_query(self, predicate, predicate_type, limit):
+        bnode_filter = "FILTER(!ISBLANK(?s))" if not self.show_bnode_subjects else ""
         if predicate_type == str(OWL.ObjectProperty):
             type_query = """
                 prefix owl: <http://www.w3.org/2002/07/owl#>
@@ -674,7 +682,7 @@ def __create_predicate_query(self, predicate, predicate_type, limit):
                         (group_concat(?tgt_c;separator=' ') as ?tgt)
                     where {
                       $pattern
-                      FILTER(!ISBLANK(?s))
+                      $bnode_filter
                       ?s a ?src_c .
                       FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/2002/07/owl#'))
                       ?o a ?tgt_c .
@@ -695,7 +703,8 @@ def __create_predicate_query(self, predicate, predicate_type, limit):
                   {
                     select (group_concat(?src_c;separator=' ') as ?src) (SAMPLE(COALESCE(?dtype, xsd:string)) as ?dt) where {
                       $pattern
-                      FILTER(!ISBLANK(?s) && ISLITERAL(?o))
+                      FILTER(ISLITERAL(?o))
+                      $bnode_filter
                       ?s a ?src_c .
                       FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/2002/07/owl#'))
                       BIND(DATATYPE(?o) as ?dtype) .
@@ -724,7 +733,7 @@ def __create_predicate_query(self, predicate, predicate_type, limit):
                             select (group_concat(?src_c;separator=' ') as ?src)
                                    (group_concat(?tgt_c;separator=' ') as ?tgt) where {
                               $pattern
-                              FILTER(!ISBLANK(?s))
+                              $bnode_filter
                               ?s a ?src_c .
                               FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/2002/07/owl#'))
                               FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/ns/shacl#'))
@@ -742,7 +751,8 @@ def __create_predicate_query(self, predicate, predicate_type, limit):
                             select (group_concat(?src_c;separator=' ') as ?src)
                                    (SAMPLE(COALESCE(?dtype, xsd:string)) as ?dt) where {
                               $pattern
-                              FILTER(!ISBLANK(?s) && ISLITERAL(?o))
+                              FILTER(ISLITERAL(?o))
+                              $bnode_filter
                               ?s a ?src_c .
                               FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/2002/07/owl#'))
                               FILTER (!STRSTARTS(STR(?src_c), 'http://www.w3.org/ns/shacl#'))
@@ -756,6 +766,7 @@ def __create_predicate_query(self, predicate, predicate_type, limit):
                 """
         query_text = Template(type_query).substitute(
             pattern=self.__filtered_graph_pattern(predicate),
+            bnode_filter=bnode_filter,
             limit=limit)
         return query_text
 

diff --git a/requirements.txt b/requirements.txt
@@ -13,8 +13,9 @@ isodate>=0.6.0
 jsonschema>=3.2.0
 attrs>=19.3.0
 pyrsistent>=0.16.0
-setuptools>=40.8.0
+setuptools<58.0.0
 namedentities>=1.5.2
 zipp>=3.1.0
 pytest~=6.1.2
 SPARQLWrapper~=1.8.5
+sparql-endpoint-fixture>=0.5.0
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,3 @@
+pytest_plugins = [
+    "sparql_endpoint_fixture.endpoint"
+]
diff --git a/tests/bundle/__init__.py b/tests/bundle/__init__.py
@@ -1,3 +0,0 @@
-import os
-if not os.path.isdir('tests-output/bundle'):
-    os.makedirs('tests-output/bundle')

diff --git a/tests/bundle/test_markdown.py b/tests/bundle/test_markdown.py
@@ -3,28 +3,28 @@
 from os.path import basename
 
 
-def test_markdown_table():
+def test_markdown_table(tmp_path):
     onto_tool.main([
-        'bundle', '-v', 'output', 'tests-output/bundle',
+        'bundle', '-v', 'output', f'{tmp_path}',
         'tests/bundle/markdown.yaml'
     ])
 
-    html_text = open('tests-output/bundle/Table.html').read()
+    html_text = open(f'{tmp_path}/Table.html').read()
     assert '<table' in html_text.lower()
 
 
-def test_markdown_bulk():
+def test_markdown_bulk(tmp_path):
     onto_tool.main([
-        'bundle', '-v', 'output', 'tests-output/bundle',
+        'bundle', '-v', 'output', f'{tmp_path}',
         'tests/bundle/bulk_md.yaml'
     ])
 
-    inc_no_exc = [basename(f) for f in glob('tests-output/bundle/bulk_md/inc_no_exc/*')]
+    inc_no_exc = [basename(f) for f in glob(f'{tmp_path}/bulk_md/inc_no_exc/*')]
     assert sorted(inc_no_exc) == ['a1.html']
 
-    exc_no_inc = [basename(f) for f in glob('tests-output/bundle/bulk_md/exc_no_inc/*')]
+    exc_no_inc = [basename(f) for f in glob(f'{tmp_path}/bulk_md/exc_no_inc/*')]
     assert sorted(exc_no_inc) == ['a1.html', 'c3.html']
 
-    inc_and_exc = [basename(f) for f in glob('tests-output/bundle/bulk_md/inc_and_exc/*')]
+    inc_and_exc = [basename(f) for f in glob(f'{tmp_path}/bulk_md/inc_and_exc/*')]
     assert sorted(inc_and_exc) == ['b2.html', 'c3.html']
 
diff --git a/tests/bundle/test_message.py b/tests/bundle/test_message.py
@@ -4,10 +4,10 @@
 from onto_tool import onto_tool
 
 
-def test_action_message(caplog):
+def test_action_message(caplog, tmp_path):
     caplog.set_level(logging.INFO)
     onto_tool.main([
-        'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/message.yaml'
+        'bundle', '-v', 'output', f'{tmp_path}', 'tests/bundle/message.yaml'
     ])
 
     logs = caplog.text

diff --git a/tests/bundle/test_sparql.py b/tests/bundle/test_sparql.py
@@ -8,11 +8,11 @@ def lists_equal(list_one, list_two):
     return len(list_one) == len(list_two) and sorted(list_one) == sorted(list_two)
 
 
-def test_sparql_queries():
+def test_sparql_queries(tmp_path):
     onto_tool.main([
-        'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/sparql.yaml'
+        'bundle', '-v', 'output', f'{tmp_path}', 'tests/bundle/sparql.yaml'
     ])
-    with open('tests-output/bundle/sparql.csv') as csvfile:
+    with open(f'{tmp_path}/sparql.csv') as csvfile:
         actual = list(row for row in csv.DictReader(csvfile))
     expected = [
         {'s': 'https://data.clientX.com/d/topOntology',
@@ -25,12 +25,12 @@ def test_sparql_queries():
     assert actual == expected
 
 
-def test_sparql_updates():
+def test_sparql_updates(tmp_path):
     onto_tool.main([
-        '-k', 'bundle', '-v', 'output', 'tests-output/bundle/endpoint_sparql', 'tests/bundle/sparql_update.yaml'
+        '-k', 'bundle', '-v', 'output', f'{tmp_path}', 'tests/bundle/sparql_update.yaml'
     ])
 
-    with open('tests-output/bundle/endpoint_sparql/sparql_update_select.csv') as csvfile:
+    with open(f'{tmp_path}/sparql_update_select.csv') as csvfile:
         actual = list(row for row in csv.DictReader(csvfile))
     expected = [
         {'person': 'http://example.com/John',
@@ -41,47 +41,47 @@ def test_sparql_updates():
     assert actual == expected
 
     constructed_graph = Graph()
-    constructed_graph.parse('tests-output/bundle/endpoint_sparql/sparql_update_construct.xml', format='xml')
+    constructed_graph.parse(f'{tmp_path}/sparql_update_construct.xml', format='xml')
     labels = list(constructed_graph.subject_objects(SKOS.prefLabel))
     assert lists_equal([(URIRef('http://example.com/John'), Literal('John Johnson')),
                         (URIRef('http://example.com/Jane'), Literal('Jane Johnson'))],
                        labels)
 
 
-def test_each_file():
+def test_each_file(tmp_path):
     onto_tool.main([
-        'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/sparql-each.yaml'
+        'bundle', '-v', 'output', f'{tmp_path}', 'tests/bundle/sparql-each.yaml'
     ])
 
     # Verify CONSTRUCT
     constructed_graph = Graph()
-    constructed_graph.parse('tests-output/bundle/each/construct/upper_ontology.ttl', format='turtle')
+    constructed_graph.parse(f'{tmp_path}/each/construct/upper_ontology.ttl', format='turtle')
     labels = list(constructed_graph.subject_objects(SKOS.prefLabel))
     assert len(labels) == 5
     constructed_graph = Graph()
-    constructed_graph.parse('tests-output/bundle/each/construct/domain_ontology.ttl', format='turtle')
+    constructed_graph.parse(f'{tmp_path}/each/construct/domain_ontology.ttl', format='turtle')
     labels = list(constructed_graph.subject_objects(SKOS.prefLabel))
     assert len(labels) == 6
 
     # Verify SELECT
-    with open('tests-output/bundle/each/select/upper_ontology.csv') as csvfile:
+    with open(f'{tmp_path}/each/select/upper_ontology.csv') as csvfile:
         actual = list(row['label'] for row in csv.DictReader(csvfile))
     expected = ["Person", "Upper Ontology", "has phone number", "is friend of", "is private"]
     assert actual == expected
-    with open('tests-output/bundle/each/select/domain_ontology.csv') as csvfile:
+    with open(f'{tmp_path}/each/select/domain_ontology.csv') as csvfile:
         actual = list(row['label'] for row in csv.DictReader(csvfile))
     expected = ["Domain Ontology", "School", "Student", "Teacher", "teaches", "works for"]
     assert actual == expected
 
     # Verify UPDATE
     assert actual == expected
     constructed_graph = Graph()
-    constructed_graph.parse('tests-output/bundle/each/update/upper_ontology.ttl', format='turtle')
+    constructed_graph.parse(f'{tmp_path}/each/update/upper_ontology.ttl', format='turtle')
     labels = list(constructed_graph.subject_objects(SKOS.prefLabel))
     assert len(labels) == 5
     assert not list(constructed_graph.subject_objects(RDFS.label))
     constructed_graph = Graph()
-    constructed_graph.parse('tests-output/bundle/each/update/domain_ontology.ttl', format='turtle')
+    constructed_graph.parse(f'{tmp_path}/each/update/domain_ontology.ttl', format='turtle')
     labels = list(constructed_graph.subject_objects(SKOS.prefLabel))
     assert len(labels) == 6
     assert not list(constructed_graph.subject_objects(RDFS.label))
diff --git a/tests/bundle/test_syntax_error.py b/tests/bundle/test_syntax_error.py
@@ -3,10 +3,10 @@
 import re
 
 
-def test_syntax_export(caplog):
+def test_syntax_export(caplog, tmp_path):
     with raises(SystemExit) as wrapped_exit:
         onto_tool.main([
-            'bundle', '-v', 'output', 'tests-output/bundle', 'tests/bundle/syntax_error.yaml'
+            'bundle', '-v', 'output', f'{tmp_path}', 'tests/bundle/syntax_error.yaml'
         ])
     assert wrapped_exit.type == SystemExit
     assert wrapped_exit.value.code == 1