Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -4282,36 +4282,34 @@ def is_candidate(prepared_line):
return False

if is_only_digit_and_punct(prepared_line):
if TRACE:
logger_debug(f'is_candidate: is_only_digit_and_punct:\n{prepared_line!r}')

return False

if gibberish_detector.detect_gibberish(prepared_line):
if TRACE:
logger_debug(f'is_candidate: gibberish_detector.detect_gibberish:\n{prepared_line!r}')
logger_debug(
f'is_candidate: gibberish_detector.detect_gibberish:\n{prepared_line!r}'
)
return False

lowered = prepared_line.lower() # ✅ DEFINE ONCE, ALWAYS

# Ignore lines where (c) appears only in URL-like text
if '(c)' in lowered and 'http' in lowered:
if not copyrights_hint.years(prepared_line):
for marker in copyrights_hint.statement_markers:
if marker != '(c)' and marker in lowered:
break
else:
return False

if copyrights_hint.years(prepared_line):
return True
else:
pass
prepared_line = prepared_line.lower()

for marker in copyrights_hint.statement_markers:
if marker in prepared_line:
if marker in lowered:
return True


def is_inside_statement(
chars_only_line,
markers=('copyright', 'copyrights', 'copyrightby',) + copyrights_hint.all_years,
):
"""
Return True if a line ends with some strings that indicate we are still
inside a statement.
"""
return chars_only_line and chars_only_line.endswith(markers)

return False

def is_end_of_statement(chars_only_line):
"""
Expand Down
21 changes: 20 additions & 1 deletion tests/cluecode/test_copyrights_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,26 @@
from commoncode.testcase import FileBasedTesting
from cluecode import copyrights
from cluecode.copyrights import prepare_text_line
from cluecode.copyrights import remove_non_chars
from cluecode import copyrights
from cluecode.copyrights import prepare_text_line, remove_non_chars

def test_copyright_symbol_inside_url_is_ignored():
text = "See http://example.com/(c)/path for more information."

prepped = prepare_text_line(text)

# sanity check
assert '(c)' in prepped

# URLs containing (c) should NOT be copyright candidates
assert not copyrights.is_candidate(prepped)

def test_copyright_with_url_is_still_candidate():
text = "Copyright (c) 2000 Foo, http://example.com"

prepped = prepare_text_line(text)

assert copyrights.is_candidate(prepped)


class TestTextPreparation(FileBasedTesting):
Expand Down
Loading