aboutcode-org · mr-raj12 · Feb 2, 2026
diff --git a/clearcode/cdutils.py b/clearcode/cdutils.py
@@ -34,7 +34,7 @@
 from packageurl import PackageURL
 
 """
-ClearlyDefined utlities.
+ClearlyDefined utilities.
 """
 
 TRACE_FETCH = False
@@ -532,7 +532,7 @@ def str2coord(s):
     segments = s.strip(splitter).split(splitter)
     if is_urn or is_url:
         segments = segments[1:]
-    # ignore extra segments for now beyond the 5 fisrt (such as the PR of a curation)
+    # ignore extra segments for now beyond the 5 first (such as the PR of a curation)
     segments = segments[:5]
 
     fields = (

diff --git a/clearcode/store_scans.py b/clearcode/store_scans.py
@@ -34,8 +34,8 @@
 
 """
 The input is a bunch of scans from ClearlyDefined and
-the output is a bunch of git repositories with commited and
-pushed scans such that we balance the scans roughly evenly accross
+the output is a bunch of git repositories with committed and
+pushed scans such that we balance the scans roughly evenly across
 different repositories.
 
 The primary reason for multiple repositories is size of a single
@@ -127,7 +127,7 @@ def get_cd_item_by_purl_hash(cd_items):
 def add_scancode_scan(repo, purl, scancode_scan):
     """
     Save and commit scancode scan for purl to git repo.
-    Return true if we commited else false
+    Return true if we committed else false
     """
     purl_data_dir = get_or_create_dir_for_purl(purl=purl, repo=repo)
     scancode_scan_path = purl_data_dir / "scancode-toolkit-scan.json"

diff --git a/clearindex/management/commands/run_clearindex.py b/clearindex/management/commands/run_clearindex.py
@@ -161,7 +161,7 @@ def handle(self, *args, **options):
 def map_definition(cditem):
     """
     Map a CD definition. Return the Package created from a mapped CD definition
-    or None if a Package could not be created or an Exception has occured.
+    or None if a Package could not be created or an Exception has occurred.
     """
     try:
         with transaction.atomic():
@@ -328,7 +328,7 @@ def str2coord(s):
     segments = s.strip(splitter).split(splitter)
     if is_urn or is_url:
         segments = segments[1:]
-    # ignore extra segments for now beyond the 5 fisrt (such as the PR of a curation)
+    # ignore extra segments for now beyond the 5 first (such as the PR of a curation)
     segments = segments[:5]
 
     fields = (

diff --git a/docs/source/how-to-guides/deploy_to_devel.rst b/docs/source/how-to-guides/deploy_to_devel.rst
@@ -4,7 +4,7 @@
 Map deployed code back to source code aka. back2source
 =======================================================
 
-In this tutorial we excercise the ScanCode.io pipeline used map the deployed binaries back to the
+In this tutorial we exercise the ScanCode.io pipeline used map the deployed binaries back to the
 assumed source code of a package, or map source archives to the sources from a version control
 system (VCS) checkout.
 
@@ -59,17 +59,17 @@ Yet these assumption are often proven wrong and the potential for many issues:
   <https://en.wikipedia.org/wiki/XZ_Utils_backdoor>_ incident where the source archive of the XZ
   Utils packages had been modified to create a malicious SSH backdoor. These cases need to be
   detected ideally before the source code is even built. back2source has been detecting the
-  XZ malicious automake build scripts as requring review, and this using code available before the
+  XZ malicious automake build scripts as requiring review, and this using code available before the
   XZ backdoor issue was known.
 
-- Extra code may be provisioned and routinely injected or complied in the final binary without
+- Extra code may be provisioned and routinely injected or compiled in the final binary without
   malice.
 
   - For instance, an "UberJAR" is created as a larger Java JAR
     <https://en.wikipedia.org/wiki/JAR_(file_format)>_
     as the combination of multiple JARS. The other JARs are fetched at built time and not present in
     source code form and commonly without metadata to help track their origin. This means that using
-    package A, means really using unknowningly A, but also B and C. There are license and security
+    package A, means really using unknowingly A, but also B and C. There are license and security
     implications when the license, origin and vulnerability status of B and C goes undetected. Most
     tools do not detect these extra package inclusions.
 
@@ -102,12 +102,12 @@ The ScanCode.io pipeline supports these technologies:
 
 - end-to-end ELF binaries package binary to source analysis. The focus is on on binaries compiled
   from C (C++ will be implemented separately in the future as it requires additional demangling of
-  function signatures). This analysis is based extracting DWARF debug symbols compliation unit
+  function signatures). This analysis is based extracting DWARF debug symbols compilation unit
   references.
 
-- end-to-end Go binary executable to source analysis o binary to source analysis. Note that Go is
-  special, as while its targets binaries are compiled to ELF, Macho-O and Windows PE/COFF formats,
-  depending on the operating system target and can also be anlyzed as an ELF for Linux, a Go
+- end-to-end Go binary executable to source analysis or binary to source analysis. Note that Go is
+  special, as while its targets binaries are compiled to ELF, Mach-O and Windows PE/COFF formats,
+  depending on the operating system target and can also be analyzed as an ELF for Linux, a Go
   binary also contains extra information to map source and binaries together through a specific
   data structure. This pipeline will be using this data structure (aka. the pclntab).
 
@@ -450,6 +450,6 @@ Here is how the project creation looks like:
 .. image:: images/d2d-images/43a5ff56-fb36-45c7-82bb-8b5256759eee.png
 
 
-- Inthe resource page, there are also file-level mappings details:
+- In the resource page, there are also file-level mappings details:
 
 .. image:: images/d2d-images/4acd087e-0cd1-4361-a8ee-f7af7681c74e.png
diff --git a/docs/source/how-to-guides/symbols_and_strings.rst b/docs/source/how-to-guides/symbols_and_strings.rst
@@ -76,7 +76,7 @@ Binary analysis
 ~~~~~~~~~~~~~~~~
 
 Once we have collected symbols and strings from the source code, we can search these in a binary.
-The presence of these symbols in the binaries can be used to find the origin of code complied in
+The presence of these symbols in the binaries can be used to find the origin of code compiled in
 binaries with a lightweight "reverse" engineering process. For instance, a tool like BANG
 <https://github.com/armijnhemel/binaryanalysis-ng/>_ can use the source symbols to build
 automatons-based search indexes to support an efficient binary origin analysis.

diff --git a/docs/source/purldb/rest_api.rst b/docs/source/purldb/rest_api.rst
@@ -133,7 +133,7 @@ An API endpoint that provides the ability to list and get packages.
                 "parties": [
                     {
                         "type": "person",
-                        "role": "developper",
+                        "role": "developer",
                         "name": "Elastic",
                         "email": null,
                         "url": "https://www.elastic.co"
@@ -277,7 +277,7 @@ The package details view returns all information available about a package.
         "parties": [
             {
                 "type": "person",
-                "role": "developper",
+                "role": "developer",
                 "name": "Elastic",
                 "email": null,
                 "url": "https://www.elastic.co"
@@ -468,7 +468,7 @@ Using cURL to get enhanced package data:
         "parties": [
             {
                 "type": "person",
-                "role": "developper",
+                "role": "developer",
                 "name": "Elastic",
                 "email": null,
                 "url": "https://www.elastic.co"
@@ -550,7 +550,7 @@ Using cURL to reindex a package:
 Filter by checksum
 ~~~~~~~~~~~~~~~~~~
 
-Take a mapping, where the keys are the names of the checksum algorthm and the
+Take a mapping, where the keys are the names of the checksum algorithm and the
 values is a list of checksum values and query those values against the
 packagedb.
 
@@ -666,7 +666,7 @@ One action is available on resources:
 Filter by checksum
 ~~~~~~~~~~~~~~~~~~
 
-Take a mapping, where the keys are the names of the checksum algorthm and the
+Take a mapping, where the keys are the names of the checksum algorithm and the
 values is a list of checksum values and query those values against the
 packagedb.
 

diff --git a/docs/source/purldb/symbol_and_string_collection.rst b/docs/source/purldb/symbol_and_string_collection.rst
@@ -11,9 +11,9 @@ pipeline and stores them in the ``extra_data`` field of the resource model.
 What are symbols?
 ------------------
 
-Source code symbols are the names of the functions, methods, classes, varibales and data structures
-as found in source code. Another name is "identifiers". Source code iterals (or "strings") are the
-string values of variables, such as messages asssigned to a variable or constant in the source code
+Source code symbols are the names of the functions, methods, classes, variables and data structures
+as found in source code. Another name is "identifiers". Source code literals (or "strings") are the
+string values of variables, such as messages assigned to a variable or constant in the source code
 of a program.
 
 Why would you want to collect source symbols?

diff --git a/etc/ci/macports-ci b/etc/ci/macports-ci
@@ -190,7 +190,7 @@ do
 # this check confirms that ports were installed
 # notice that port -N selfupdate && break is not sufficient as a test
 # (sometime it returns a success even though ports have not been installed)
-# for some misterious reasons, running without "-d" does not work in some case
+# for some mysterious reasons, running without "-d" does not work in some case
   sudo port -d -N selfupdate 2>&1 | grep -v DEBUG | awk '{if($1!="x")print}'
   port info xdrfile > /dev/null && break || true
   sleep 5

diff --git a/etc/scripts/fetch_thirdparty.py b/etc/scripts/fetch_thirdparty.py
@@ -166,7 +166,7 @@ def fetch_thirdparty(
     Download the PyPI packages listed in the combination of:
     - the pip requirements --requirements REQUIREMENT-FILE(s),
     - the pip name==version --specifier SPECIFIER(s)
-    - any pre-existing wheels or sdsists found in --dest-dir THIRDPARTY_DIR.
+    - any pre-existing wheels or sdists found in --dest-dir THIRDPARTY_DIR.
 
     Download wheels with the --wheels option for the ``--python-version``
     PYVER(s) and ``--operating_system`` OS(s) combinations defaulting to all

diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py
@@ -177,7 +177,7 @@ def simple_index_entry(self, base_url):
 def build_pypi_index(directory, base_url="https://thirdparty.aboutcode.org/pypi"):
     """
     Create the a PyPI simple directory index using a ``directory`` directory of wheels and sdists in
-    the direvctory at ``directory``/simple/ populated with the proper PyPI simple index directory
+    the directory at ``directory``/simple/ populated with the proper PyPI simple index directory
     structure crafted using symlinks.
 
     WARNING: The ``directory``/simple/ directory is removed if it exists. NOTE: in addition to the a

diff --git a/etc/scripts/gen_requirements.py b/etc/scripts/gen_requirements.py
@@ -15,7 +15,7 @@
 """
 Utilities to manage requirements files.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 

diff --git a/etc/scripts/gen_requirements_dev.py b/etc/scripts/gen_requirements_dev.py
@@ -15,7 +15,7 @@
 """
 Utilities to manage requirements files.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 

diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py
@@ -86,7 +86,7 @@ def update_with_dejacode_data(distribution):
 
 def update_with_dejacode_about_data(distribution):
     """
-    Update the Distribution `distribution` wiht ABOUT code data fetched from
+    Update the Distribution `distribution` with ABOUT code data fetched from
     DejaCode. Return True if data was updated.
     """
     package_data = get_package_data(distribution)

diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py
@@ -14,7 +14,7 @@
 """
 Utilities to manage requirements files and call pip.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 

diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py
@@ -68,15 +68,15 @@
   was built for and these tags can be matched to an Environment.
 
 - An Environment is a combination of a Python version and operating system
-  (e.g., platfiorm and ABI tags.) and is represented by the "tags" it supports.
+  (e.g., platform and ABI tags.) and is represented by the "tags" it supports.
 
 - A plain LinksRepository which is just a collection of URLs scrape from a web
-  page such as HTTP diretory listing. It is used either with pip "--find-links"
+  page such as HTTP directory listing. It is used either with pip "--find-links"
   option or to fetch ABOUT and LICENSE files.
 
 - A PypiSimpleRepository is a PyPI "simple" index where a HTML page is listing
   package name links. Each such link points to an HTML page listing URLs to all
-  wheels and sdsist of all versions of this package.
+  wheels and sdist of all versions of this package.
 
 PypiSimpleRepository and Packages are related through packages name, version and
 filenames.
@@ -265,7 +265,7 @@ def download_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR, repos=tu
             fetched_wheel_filenames.append(fetched_wheel_filename)
 
         if fetched_wheel_filenames:
-            # do not futher fetch from other repos if we find in first, typically PyPI
+            # do not further fetch from other repos if we find in first, typically PyPI
             break
 
     return fetched_wheel_filenames
@@ -305,7 +305,7 @@ def download_sdist(name, version, dest_dir=THIRDPARTY_DIR, repos=tuple()):
         fetched_sdist_filename = package.sdist.download(dest_dir=dest_dir)
 
         if fetched_sdist_filename:
-            # do not futher fetch from other repos if we find in first, typically PyPI
+            # do not further fetch from other repos if we find in first, typically PyPI
             break
 
     return fetched_sdist_filename
@@ -1648,7 +1648,7 @@ def _get_package_versions_map(self, name):
             self.fetched_package_normalized_names.add(normalized_name)
             try:
                 links = self.fetch_links(normalized_name=normalized_name)
-                # note that thsi is sorted so the mapping is also sorted
+                # note that this is sorted so the mapping is also sorted
                 versions = {
                     package.version: package
                     for package in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links)

diff --git a/matchcode/match.py b/matchcode/match.py
@@ -173,7 +173,7 @@ def individual_file_match(codebase):
 
 def approximate_file_match(codebase):
     """
-    Update Matches from approximatly matched Package files in `codebase`.
+    Update Matches from approximately matched Package files in `codebase`.
 
     Return  the number of approximate matches found in `codebase`.
     """

diff --git a/matchcode/models.py b/matchcode/models.py
@@ -496,7 +496,7 @@ def match(cls, fingerprints):
         # strip positions
         only_fings = [hexstring_to_binarray(fing["snippet"]) for fing in fingerprints]
 
-        # Step 0: get all fingerprint records that match whith the input
+        # Step 0: get all fingerprint records that match with the input
         matched_fps = cls.objects.filter(fingerprint__in=only_fings)
 
         # Step 1: count Packages whose fingerprints appear
@@ -559,7 +559,7 @@ def match_resources(cls, fingerprints, top=None, **kwargs):
 
         # TODO: track matched package and package resource in ExtendedFileFragmentMatch
 
-        # Step 0: get all fingerprint records that match whith the input
+        # Step 0: get all fingerprint records that match with the input
         matched_fps = cls.objects.filter(fingerprint__in=only_fings)
 
         # Step 1: get Resources that show up in the query

diff --git a/matchcode/utils.py b/matchcode/utils.py
@@ -200,7 +200,7 @@ def index_resource_fingerprints(codebase, package):
     Return a tuple of integers, `indexed_adci`, `indexed_adsi`, and
     `indexed_arci` that represent the number of indexed
     ApproximateDirectoryContentIndex, ApproximateDirectoryStructureIndex, and
-    ApproximateResourceContentIndex created, respectivly.
+    ApproximateResourceContentIndex created, respectively.
     """
     from matchcode.models import ApproximateDirectoryContentIndex
     from matchcode.models import ApproximateDirectoryStructureIndex
@@ -252,7 +252,7 @@ def index_package_directories(package):
     Return a tuple of integers, `indexed_adci`, `indexed_adsi`, and
     `indexed_arci` that represent the number of indexed
     ApproximateDirectoryContentIndex, ApproximateDirectoryStructureIndex, and
-    ApproximateResourceContentIndex created, respectivly.
+    ApproximateResourceContentIndex created, respectively.
 
     Return 0, 0, 0 if a VirtualCodebase cannot be created from the Resources of
     a Package.

diff --git a/matchcode_pipeline/pipelines/matching.py b/matchcode_pipeline/pipelines/matching.py
@@ -33,7 +33,7 @@ class Matching(ScanCodebase, LoadInventory):
     2. Match archive to Resources
     3. Match directory exactly
     4. Match files exactly
-    5. Match directories approximatly
+    5. Match directories approximately
     6. Match files approximately
     7. Matching on similar file attributes (path, type, extension, size, Java classpath, etc.)
     8. Return only the best matches (We could inject some user input, policies, we could provide a list of purls to guide matching, )

diff --git a/matchcode_pipeline/pipes/matching.py b/matchcode_pipeline/pipes/matching.py
@@ -61,7 +61,7 @@ def get_project_resources_qs(project, resources):
             # intend to. For example, if we have matched on the directory with
             # the path `foo/bar/1`, using the __startswith filter without
             # including a trailing slash on the path would have us get all
-            # diretories under `foo/bar/` that start with 1, such as
+            # directories under `foo/bar/` that start with 1, such as
             # `foo/bar/10001`, `foo/bar/123`, etc., when we just want `foo/bar/1`
             # and its descendants.
             path = f"{resource.path}/"
@@ -243,7 +243,7 @@ def match_purldb_directory(project, resource, exact_match=False):
 def match_sha1s_to_purldb(project, resources_by_sha1, matcher_func, package_data_by_purldb_urls):
     """
     Process `resources_by_sha1` with `matcher_func` and return a 3-tuple
-    contaning an empty defaultdict(list), the number of matches and the number
+    containing an empty defaultdict(list), the number of matches and the number
     of sha1s sent to purldb.
     """
     matched_count = matcher_func(

diff --git a/minecode/collectors/debian.py b/minecode/collectors/debian.py
@@ -56,7 +56,7 @@ def process_request(purl_str, **kwargs):
             source_package_url = PackageURL.from_string(source_purl)
 
     except ValueError as e:
-        error = f"error occured when parsing purl: {purl_str} source_purl: {source_purl} : {e}"
+        error = f"error occurred when parsing purl: {purl_str} source_purl: {source_purl} : {e}"
         return error
 
     has_version = bool(package_url.version)
@@ -75,7 +75,7 @@ def map_debian_package(debian_package, package_content, pipelines, priority=0):
     """
     Add a debian `package_url` to the PackageDB.
 
-    Return an error string if errors have occured in the process.
+    Return an error string if errors have occurred in the process.
     """
     from minecode.model_utils import add_package_to_scan_queue
     from minecode.model_utils import merge_or_create_package

diff --git a/minecode/collectors/generic.py b/minecode/collectors/generic.py
@@ -76,7 +76,7 @@ def process_request(purl_str, **kwargs):
     try:
         package_url = PackageURL.from_string(purl_str)
     except ValueError as e:
-        error = f"error occured when parsing {purl_str}: {e}"
+        error = f"error occurred when parsing {purl_str}: {e}"
         return error
 
     download_url = package_url.qualifiers.get("download_url")