Skip to content

Commit e450a01

Browse files
authored
Merge pull request #98 from dh-tech/feature/edtf-demo-notebook
EDTF demo/validation notebook
2 parents ee26ed1 + b821923 commit e450a01

File tree

9 files changed

+866
-9
lines changed

9 files changed

+866
-9
lines changed

examples/notebooks/edtf-support.ipynb

Lines changed: 799 additions & 0 deletions
Large diffs are not rendered by default.

src/undate/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
__version__ = "0.3.0.dev0"
2+
3+
from undate.date import DatePrecision
4+
from undate.undate import Undate, UndateInterval
5+
6+
__all__ = ["Undate", "UndateInterval", "DatePrecision"]

src/undate/converters/edtf/converter.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,14 @@ def _undate_to_string(self, undate: Undate) -> str:
7171
if undate.precision >= DatePrecision.YEAR:
7272
year = self._convert_missing_digits(undate.year, undate.MISSING_DIGIT)
7373
# years with more than 4 digits should be prefixed with Y
74-
if year and len(year) > 4:
75-
year = f"Y{year}"
74+
# (don't count minus sign when checking digits)
75+
if year and len(year.lstrip("-")) > 4:
76+
negative_year = ""
77+
if year.startswith("-"):
78+
negative_year = "-"
79+
year = year[1:]
80+
year = f"{negative_year}Y{year}"
81+
7682
# TODO: handle uncertain / approximate
7783
parts.append(year or EDTF_UNSPECIFIED_DIGIT * 4)
7884

@@ -97,4 +103,4 @@ def _undate_to_string(self, undate: Undate) -> str:
97103
return "-".join(parts)
98104

99105
# how can we have an empty string? probably shouldn't get here
100-
return ""
106+
raise ValueError("Failed to generate an EDTF string from %r", undate)

src/undate/converters/edtf/edtf.lark

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
date: year | year "-" month | year "-" month "-" day
1616

17-
year: INT
17+
year: INT | /\-/ INT
1818
month: /(0[1-9])|(1[0-2])/
1919
day: /([0-2][1-9])|(3[0-1])/
2020

@@ -34,14 +34,15 @@ uncertain_approximate: "%"
3434

3535
// The character 'X' may be used in place of one or more rightmost
3636
// digits to indicate that the value of that digit is unspecified
37+
// In Level 2, year may be completely unspecified.
3738
unspecified: /X/
38-
?year_unspecified: /\d+/ unspecified+
39+
?year_unspecified: /\d+/ unspecified+ | unspecified ~ 4
3940
?month_unspecified: "0".."1"? unspecified ~ 1..2
4041
//?year_month_unspecified: year_l1 "-" month_unspecified
4142
?day_unspecified: "0".."3"? unspecified ~ 1..2
4243

4344
// 'Y' may be used at the beginning of the date string to signify that the date is a year, when (and only when) the year exceeds four digits, i.e. for years later than 9999 or earlier than -9999.
44-
year_fivedigitsplus: /Y\d{5,}/
45+
year_fivedigitsplus: /-?Y\d{5,}/
4546
?year_l1: year_fivedigitsplus | year | year_unspecified
4647

4748
// The values 21, 22, 23, 24 may be used used to signify

src/undate/converters/edtf/transformer.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,26 @@ def day_unspecified(self, items):
6464
def date_level1(self, items):
6565
return self.date(items)
6666

67+
def year(self, items):
68+
# when the year is negative, there are two tokens
69+
if len(items) > 1 and items[0] == "-":
70+
# an anonymous token for the - and the integer year
71+
year = items[1]
72+
return Tree(data="year", children=[-year])
73+
74+
return Tree(data="year", children=[items[0]])
75+
6776
def year_fivedigitsplus(self, items):
6877
# strip off the leading Y and convert to integer
6978
token = items[0]
70-
year = int(token.value.lstrip("Y"))
79+
value = token.value
80+
# check if year is negative
81+
negative = False
82+
if value.startswith("-"):
83+
value = value[1:]
84+
negative = True
85+
year = int(value.lstrip("Y"))
86+
87+
if negative:
88+
year = -year
7189
return Tree(data="year", children=[year])

src/undate/undate.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,12 @@ def __init__(
5858
elif year:
5959
self.precision = DatePrecision.YEAR
6060

61-
# TODO: refactor partial date min/max calculations
61+
# special case: treat year = XXXX as unknown/none
62+
if year == "XXXX":
63+
year = None
6264

6365
if year is not None:
66+
# could we / should we use str.isnumeric here?
6467
try:
6568
year = int(year)
6669
# update initial value since it is used to determine
@@ -113,7 +116,7 @@ def __init__(
113116
# if we have no day or partial day, calculate min / max
114117
min_day = 1
115118
# if we know year and month (or max month), calculate exactly
116-
if year and month:
119+
if year and month and isinstance(year, int):
117120
_, max_day = monthrange(int(year), max_month)
118121
elif year is None and month:
119122
# If we don't have year and month,

tests/test_converters/edtf/test_edtf_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
"1000-01/2000-05-01",
1313
# level 1
1414
"Y170000002",
15+
"-Y170000002",
1516
"2001-21", # spring 2001
17+
# negative year
18+
"-1985",
1619
# qualifiers
1720
"1984?",
1821
"2004-06~",
@@ -28,6 +31,12 @@
2831
"1985-04/..",
2932
"../1985-04-12",
3033
"/1985-04-12",
34+
# level 2 unspecified digits
35+
"156X-12-25",
36+
"XXXX-12-XX",
37+
"1XXX-12",
38+
"1XXX-XX",
39+
"1984-1X",
3140
]
3241

3342

tests/test_converters/edtf/test_edtf_transformer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
("1000-01/2000-05-01", UndateInterval(Undate(1000, 1), Undate(2000, 5, 1))),
1515
# level 1
1616
("Y17000002", Undate(17000002)),
17+
("-Y17000002", Undate(-17000002)),
18+
# negative year
19+
("-1985", Undate(-1985)),
1720
# "2001-21", # spring 2001
1821
# qualifiers TODO - not yet supported by undate
1922
# "1984?",
@@ -30,6 +33,11 @@
3033
("1985-04/..", UndateInterval(Undate(1985, 4), None)),
3134
("../1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
3235
("/1985-04-12", UndateInterval(None, Undate(1985, 4, 12))),
36+
# level 2 unspecified digits
37+
("156X-12-25", Undate("156X", 12, 25)),
38+
("XXXX-12-XX", Undate("XXXX", 12, "XX")),
39+
("1XXX-XX", Undate("1XXX", "XX")),
40+
("1984-1X", Undate(1984, "1X")),
3341
]
3442

3543

tests/test_converters/test_edtf.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22
from undate.converters.edtf import EDTFDateConverter
3+
from undate.date import DatePrecision
34
from undate.undate import Undate, UndateInterval
45

56

@@ -52,5 +53,12 @@ def test_to_string(self):
5253

5354
assert EDTFDateConverter().to_string(Undate(1991, "0X")) == "1991-0X"
5455
assert EDTFDateConverter().to_string(Undate(1991, None, 3)) == "1991-XX-03"
56+
assert EDTFDateConverter().to_string(Undate(-1984)) == "-1984"
5557

58+
# if converter can't generate a string for the date,
59+
# it should return a value error
60+
empty_undate = Undate()
61+
empty_undate.precision = DatePrecision.DECADE
62+
with pytest.raises(ValueError):
63+
EDTFDateConverter().to_string(empty_undate)
5664
# TODO: override missing digit and confirm replacement

0 commit comments

Comments
 (0)