From 3b4f43c842bb226420b53239d4ffa6997ac139da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Fri, 2 Feb 2018 17:01:18 +0100 Subject: [PATCH] parsers: make JATS parser more relaxed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Szymon Łopaciuk --- hepcrawl/parsers/jats.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/hepcrawl/parsers/jats.py b/hepcrawl/parsers/jats.py index 1597b22e..33825867 100644 --- a/hepcrawl/parsers/jats.py +++ b/hepcrawl/parsers/jats.py @@ -62,7 +62,8 @@ def parse(self): self.builder.add_doi(**doi) for keyword in self.keywords: self.builder.add_keyword(**keyword) - self.builder.add_imprint_date(self.publication_date.dumps()) + if self.publication_date: + self.builder.add_imprint_date(self.publication_date.dumps()) for reference in self.references: self.builder.add_reference(reference) @@ -284,11 +285,10 @@ def publication_date(self): './front//pub-date[starts-with(@date-type,"pub")] |' './front//date[starts-with(@date-type,"pub")]' ) - publication_date = min( - self.get_date(date_node) for date_node in date_nodes - ) - - return publication_date + if date_nodes: + return min( + self.get_date(date_node) for date_node in date_nodes + ) @property def publication_info(self): @@ -366,11 +366,10 @@ def year(self): not_online=not_online ) - year = min( - self.get_date(date_node) for date_node in date_nodes - ).year - - return year + if date_nodes: + return min( + self.get_date(date_node) for date_node in date_nodes + ).year def get_author_affiliations(self, author_node): """Extract an author's affiliations."""