From 4f9d15e4f3c7185bd2bb34d304e29882fcc42835 Mon Sep 17 00:00:00 2001 From: Farid Saud <112973190+fsaudm@users.noreply.github.com> Date: Tue, 18 Feb 2025 02:06:53 -0600 Subject: [PATCH 1/2] fix(flatten_authors): fallback to 'last_known_institutions' for institution id extraction --- flatten-openalex-jsonl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flatten-openalex-jsonl.py b/flatten-openalex-jsonl.py index b7a039b..429687e 100644 --- a/flatten-openalex-jsonl.py +++ b/flatten-openalex-jsonl.py @@ -292,8 +292,9 @@ def flatten_authors(): author.get('display_name_alternatives'), ensure_ascii=False) author['last_known_institution'] = ( - author.get('last_known_institution') or {}).get( - 'id') + author.get('last_known_institutions') or author.get('last_known_institution') or {} + ).get('id') + authors_writer.writerow(author) # ids From 3f92c72abb196a76b1dfe8f8a68d239d8aad8eff Mon Sep 17 00:00:00 2001 From: Farid Saud <112973190+fsaudm@users.noreply.github.com> Date: Tue, 18 Feb 2025 02:32:59 -0600 Subject: [PATCH 2/2] fix(flatten_authors): fallback to 'last_known_institutions' for institution id extraction --- flatten-openalex-jsonl.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/flatten-openalex-jsonl.py b/flatten-openalex-jsonl.py index 429687e..d43f468 100644 --- a/flatten-openalex-jsonl.py +++ b/flatten-openalex-jsonl.py @@ -292,8 +292,11 @@ def flatten_authors(): author.get('display_name_alternatives'), ensure_ascii=False) author['last_known_institution'] = ( - author.get('last_known_institutions') or author.get('last_known_institution') or {} - ).get('id') + (author.get('last_known_institutions')[0] + if isinstance(author.get('last_known_institutions'), list) + and author.get('last_known_institutions') + else author.get('last_known_institution')) or {} + ).get('id') authors_writer.writerow(author)