From 299514792a3d36fd9dd78c9ba689d5ccd2047b10 Mon Sep 17 00:00:00 2001
From: dikshaa2909 <dikshadeware@gmail.com>
Date: Sat, 14 Feb 2026 00:35:33 +0530
Subject: [PATCH 1/4] Ignore copyright symbols inside URLs

Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
---
 src/cluecode/copyrights.py              | 12 +++++++++---
 tests/cluecode/test_copyrights_basic.py | 15 ++++++++++++++-
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py
index 6d17467acf..b4c9d34bf1 100644
--- a/src/cluecode/copyrights.py
+++ b/src/cluecode/copyrights.py
@@ -4281,10 +4281,14 @@ def is_candidate(prepared_line):
     if not prepared_line:
         return False
 
+    # Ignore copyright symbols inside URLs
+    lowered = prepared_line.lower()
+    if '(c)' in lowered and ('http://' in lowered or 'https://' in lowered):
+        return False
+
     if is_only_digit_and_punct(prepared_line):
         if TRACE:
             logger_debug(f'is_candidate: is_only_digit_and_punct:\n{prepared_line!r}')
-
         return False
 
     if gibberish_detector.detect_gibberish(prepared_line):
@@ -4294,13 +4298,15 @@ def is_candidate(prepared_line):
 
     if copyrights_hint.years(prepared_line):
         return True
-    else:
-        pass
+
     prepared_line = prepared_line.lower()
     for marker in copyrights_hint.statement_markers:
         if marker in prepared_line:
             return True
 
+    return False
+
+
 
 def is_inside_statement(
     chars_only_line,
diff --git a/tests/cluecode/test_copyrights_basic.py b/tests/cluecode/test_copyrights_basic.py
index 1fbafcf548..61387dffee 100644
--- a/tests/cluecode/test_copyrights_basic.py
+++ b/tests/cluecode/test_copyrights_basic.py
@@ -14,7 +14,20 @@
 from commoncode.testcase import FileBasedTesting
 from cluecode import copyrights
 from cluecode.copyrights import prepare_text_line
-from cluecode.copyrights import remove_non_chars
+from cluecode import copyrights
+from cluecode.copyrights import prepare_text_line, remove_non_chars
+
+def test_copyright_symbol_inside_url_is_ignored():
+    text = "See http://example.com/(c)/path for more information."
+
+    prepped = prepare_text_line(text)
+
+    # sanity check
+    assert '(c)' in prepped
+
+    # URLs containing (c) should NOT be copyright candidates
+    assert not copyrights.is_candidate(prepped)
+
 
 
 class TestTextPreparation(FileBasedTesting):

From 36f05ac36d77a33b3cd983de601a1a8aede55d41 Mon Sep 17 00:00:00 2001
From: dikshaa2909 <dikshadeware@gmail.com>
Date: Sat, 14 Feb 2026 01:35:44 +0530
Subject: [PATCH 2/4] Improve copyright symbol handling in URLs

Refactor copyright symbol detection to ignore (c) only in URL paths.

Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
---
 src/cluecode/copyrights.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py
index b4c9d34bf1..3b91d08c6e 100644
--- a/src/cluecode/copyrights.py
+++ b/src/cluecode/copyrights.py
@@ -4281,14 +4281,12 @@ def is_candidate(prepared_line):
     if not prepared_line:
         return False
 
-    # Ignore copyright symbols inside URLs
-    lowered = prepared_line.lower()
-    if '(c)' in lowered and ('http://' in lowered or 'https://' in lowered):
-        return False
-
-    if is_only_digit_and_punct(prepared_line):
-        if TRACE:
-            logger_debug(f'is_candidate: is_only_digit_and_punct:\n{prepared_line!r}')
+   # Ignore (c) only when it appears inside a URL path
+lowered = prepared_line.lower()
+if '(c)' in lowered:
+    for url in re.findall(r'https?://\S+', lowered):
+        if '(c)' in url:
+            return Falseis_candidate: is_only_digit_and_punct:\n{prepared_line!r}')
         return False
 
     if gibberish_detector.detect_gibberish(prepared_line):

From 861555f8002674692f2e22ef89ee0ab2e034e180 Mon Sep 17 00:00:00 2001
From: dikshaa2909 <dikshadeware@gmail.com>
Date: Sat, 14 Feb 2026 01:38:06 +0530
Subject: [PATCH 3/4] Implement test for copyright with URL

Add test for copyright detection with URL

Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
---
 tests/cluecode/test_copyrights_basic.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/cluecode/test_copyrights_basic.py b/tests/cluecode/test_copyrights_basic.py
index 61387dffee..7f30031788 100644
--- a/tests/cluecode/test_copyrights_basic.py
+++ b/tests/cluecode/test_copyrights_basic.py
@@ -28,6 +28,12 @@ def test_copyright_symbol_inside_url_is_ignored():
     # URLs containing (c) should NOT be copyright candidates
     assert not copyrights.is_candidate(prepped)
 
+def test_copyright_with_url_is_still_candidate():
+    text = "Copyright (c) 2000 Foo, http://example.com"
+
+    prepped = prepare_text_line(text)
+
+    assert copyrights.is_candidate(prepped)
 
 
 class TestTextPreparation(FileBasedTesting):

From 06606a9a502b40c87b2a0274fa77c8ecb6f295d6 Mon Sep 17 00:00:00 2001
From: dikshaa2909 <dikshadeware@gmail.com>
Date: Sat, 14 Feb 2026 02:13:00 +0530
Subject: [PATCH 4/4] Ignore (c) symbols inside URLs during copyright detection

Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
---
 src/cluecode/copyrights.py | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py
index 3b91d08c6e..ab591dee8d 100644
--- a/src/cluecode/copyrights.py
+++ b/src/cluecode/copyrights.py
@@ -4281,42 +4281,36 @@ def is_candidate(prepared_line):
     if not prepared_line:
         return False
 
-   # Ignore (c) only when it appears inside a URL path
-lowered = prepared_line.lower()
-if '(c)' in lowered:
-    for url in re.findall(r'https?://\S+', lowered):
-        if '(c)' in url:
-            return Falseis_candidate: is_only_digit_and_punct:\n{prepared_line!r}')
+    if is_only_digit_and_punct(prepared_line):
         return False
 
     if gibberish_detector.detect_gibberish(prepared_line):
         if TRACE:
-            logger_debug(f'is_candidate: gibberish_detector.detect_gibberish:\n{prepared_line!r}')
+            logger_debug(
+                f'is_candidate: gibberish_detector.detect_gibberish:\n{prepared_line!r}'
+            )
         return False
 
+    lowered = prepared_line.lower()   # ✅ DEFINE ONCE, ALWAYS
+
+    # Ignore lines where (c) appears only in URL-like text
+    if '(c)' in lowered and 'http' in lowered:
+        if not copyrights_hint.years(prepared_line):
+            for marker in copyrights_hint.statement_markers:
+                if marker != '(c)' and marker in lowered:
+                    break
+            else:
+                return False
+
     if copyrights_hint.years(prepared_line):
         return True
 
-    prepared_line = prepared_line.lower()
     for marker in copyrights_hint.statement_markers:
-        if marker in prepared_line:
+        if marker in lowered:
             return True
 
     return False
 
-
-
-def is_inside_statement(
-    chars_only_line,
-    markers=('copyright', 'copyrights', 'copyrightby',) + copyrights_hint.all_years,
-):
-    """
-    Return True if a line ends with some strings that indicate we are still
-    inside a statement.
-    """
-    return chars_only_line and chars_only_line.endswith(markers)
-
-
 def is_end_of_statement(chars_only_line):
     """
     Return True if a line ends with some strings that indicate we are at the end