diff --git a/.docker/docker-compose.beta.yml b/.docker/docker-compose.beta.yml deleted file mode 100644 index 967bff63..00000000 --- a/.docker/docker-compose.beta.yml +++ /dev/null @@ -1,29 +0,0 @@ -version: '3' -services: - congress_parser_api: - image: beta_congress_parser_api:latest - container_name: beta_congress_parser_api - environment: - - STAGE=prod - - db_host=10.0.0.248:5432 - - db_table=us_code_beta - - CACHE_HEADER_TIME=432000 - - CACHE_TIME=432000 - ports: ["9091:9090"] - congress_viewer_app: - image: beta_congress_viewer_app:latest - container_name: beta_congress_viewer_app - volumes: - - /var/www/congress-beta:/usr/src/app/build - entrypoint: - - "yarn" - command: - - "build" - ports: ["3001:3000"] - congress_postgres: - container_name: beta_congress_postgres - image: tianon/true -networks: - parser: - external: - name: docker_parser diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index b25f1936..e597af05 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -41,7 +41,7 @@ services: - db_table=us_code_2025 build: context: ../backend - dockerfile: .docker/Dockerfile.fastapi + dockerfile: .docker/Dockerfile volumes: - ../backend:/usr/src/app/ ports: @@ -50,6 +50,13 @@ services: - congress_postgres networks: parser: + entrypoint: "uvicorn" + command: + - "congress_fastapi.app:app" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" congress_viewer_app: image: congress_viewer_app:latest container_name: congress_viewer_app diff --git a/.docker/nginx.dockerfile b/.docker/nginx.dockerfile deleted file mode 100644 index 331eb590..00000000 --- a/.docker/nginx.dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM debian:jessie - -RUN apt-get update && apt-get install -y nginx \ - ca-certificates \ - gettext-base -RUN ln -sf /dev/stdout /var/log/nginx/access.log \ - && ln -sf /dev/stderr /var/log/nginx/error.log - -EXPOSE 80 443 - -CMD ["/usr/sbin/nginx", "-g", "daemon off;"] diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 9a70ade7..00000000 --- a/.flake8 +++ /dev/null @@ -1,4 +0,0 @@ -[flake8] -ignore = E501,Q000,D103,D100,D101,D102,D107,Q002,D205,D400,E203,E266,W503 -max-line-length = 88 -max-complexity = 18 \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d5732a4c..a6b7a2a5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -12,4 +12,4 @@ jobs: - name: Build containers run: cd /home/mustyoshi/Github/congress-dev && docker-compose -f .docker/docker-compose.yml -f .docker/docker-compose.prod.yml build - name: Deploy - run: cd /home/mustyoshi/Github/congress-dev && bash start_prod.sh \ No newline at end of file + run: cd /home/mustyoshi/Github/congress-dev && bash scripts/start_prod.sh \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8585afc9..65df7b98 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,4 +15,4 @@ jobs: requirements-frozen.txt install-cmd: cd backend && pip install -r requirements.txt -r requirements-fastapi.txt -r requirements-test.txt && pip install -e . - name: Run tests - run: cd backend && pytest billparser \ No newline at end of file + run: cd backend && pytest congress_parser \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7a66e438..9704ab24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,7 @@ -**.pyc -**/node_modules -frontend/yarn-error.log +.DS_Store + .vscode .docker/docker-compose.local.yml -backend/venv/ -venv/ - -.resources/ -*.backup -import.sh - -.DS_Store \ No newline at end of file +.volume +.resources +*.backup \ No newline at end of file diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index e0d95435..00000000 --- a/.prettierrc +++ /dev/null @@ -1,8 +0,0 @@ -{ - "semi": true, - "singleQuote": false, - "trailingComma": "all", - "bracketSpacing": true, - "jsxBracketSameLine": false, - "tabWidth": 4 -} diff --git a/README.md b/README.md index 27683eac..a3a0ef89 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,30 @@ ![congress dot dev](https://github.com/mustyoshi/congress-dev/raw/master/.github/banner.png "Congress.dev") -[![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/uses-js.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg)](https://forthebadge.com) +[![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/made-with-typescript.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg)](https://forthebadge.com) --- ## Setup -### Required Software + +### Ubuntu +``` +sudo apt-get update +sudo apt install libpq-dev python3-dev python3.13-venv build-essential gcc gfortran libc6 libxml2-dev libxslt-dev +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash +curl -fsSL https://get.pnpm.io/install.sh | sh -x + +cd backend/ +python3 -m venv .venv +source .venv/bin/activate +pip3 install -r requirements.txt -r requirements-fastapi.txt -r requirements-test.txt +python3 setup.py develop + +cd ../hillstack +cp .env.example .env +pnpm install +``` + +### Docker #### Docker `>= 19.0` Docker is a set of platform as a service products that use OS-level virtualization to deliver software in packages called containers. @@ -26,8 +45,8 @@ docker-compose -f .docker/docker-compose.yml up -d **Advance Usage** - If you rename [docker-compose.local-example.yml](./.docker/docker-compose.local-example.yml) to `docker-compose.local.yml` you can run this script to use our API instead of running the database yourself. ```bash -chmod +x ./start_local.sh -sh ./start_local.sh +chmod +x ./scripts/start_local.sh +sh ./scripts/start_local.sh ``` ### Loading the database @@ -36,18 +55,10 @@ From the backend folder, you will need to tell it to parse some files before you ```bash docker exec -it docker_parser_api bash -python3 -m billparser.importers.releases -python3 -m billparser.importers.bills +python3 -m congress_parser.importers.releases +python3 -m congress_parser.importers.bills ``` - -A semi up to date postgres dump is available for [download](https://files.congress.dev/congress_beta.backup). - -Assuming you're running the normal docker-compose and an empty database named us_code, you can run this to restore from the backup. -```bash -pg_restore -h localhost -U parser -d us_code -F C us_code_beta.backup -``` - --- ## Contributing diff --git a/backend/alembic/README.md b/backend/.alembic/README.md similarity index 100% rename from backend/alembic/README.md rename to backend/.alembic/README.md diff --git a/backend/alembic/env.py b/backend/.alembic/env.py similarity index 96% rename from backend/alembic/env.py rename to backend/.alembic/env.py index 02108c2b..0ada16bc 100644 --- a/backend/alembic/env.py +++ b/backend/.alembic/env.py @@ -5,7 +5,7 @@ from alembic import context -from billparser.db.models import Base, AppropriationsBase, PromptsBase, SensitiveBase, AuthenticationBase +from congress_db.models import Base, AppropriationsBase, PromptsBase, SensitiveBase, AuthenticationBase # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/backend/alembic/script.py.mako b/backend/.alembic/script.py.mako similarity index 100% rename from backend/alembic/script.py.mako rename to backend/.alembic/script.py.mako diff --git a/backend/alembic/versions/3749f666c0e6_create_sensitive_user.py b/backend/.alembic/versions/3749f666c0e6_create_sensitive_user.py similarity index 100% rename from backend/alembic/versions/3749f666c0e6_create_sensitive_user.py rename to backend/.alembic/versions/3749f666c0e6_create_sensitive_user.py diff --git a/backend/alembic/versions/3e1b4b7108bb_add_llm_query_table.py b/backend/.alembic/versions/3e1b4b7108bb_add_llm_query_table.py similarity index 100% rename from backend/alembic/versions/3e1b4b7108bb_add_llm_query_table.py rename to backend/.alembic/versions/3e1b4b7108bb_add_llm_query_table.py diff --git a/backend/alembic/versions/45716515aad4_bioguide_social_job.py b/backend/.alembic/versions/45716515aad4_bioguide_social_job.py similarity index 100% rename from backend/alembic/versions/45716515aad4_bioguide_social_job.py rename to backend/.alembic/versions/45716515aad4_bioguide_social_job.py diff --git a/backend/alembic/versions/581b84b38238_.py b/backend/.alembic/versions/581b84b38238_.py similarity index 100% rename from backend/alembic/versions/581b84b38238_.py rename to backend/.alembic/versions/581b84b38238_.py diff --git a/backend/alembic/versions/79a29914ef4a_votes.py b/backend/.alembic/versions/79a29914ef4a_votes.py similarity index 100% rename from backend/alembic/versions/79a29914ef4a_votes.py rename to backend/.alembic/versions/79a29914ef4a_votes.py diff --git a/backend/alembic/versions/82a79ee10856_link_appropriaton_to_prompt.py b/backend/.alembic/versions/82a79ee10856_link_appropriaton_to_prompt.py similarity index 100% rename from backend/alembic/versions/82a79ee10856_link_appropriaton_to_prompt.py rename to backend/.alembic/versions/82a79ee10856_link_appropriaton_to_prompt.py diff --git a/backend/alembic/versions/8da0e1e71536_bill_tags.py b/backend/.alembic/versions/8da0e1e71536_bill_tags.py similarity index 100% rename from backend/alembic/versions/8da0e1e71536_bill_tags.py rename to backend/.alembic/versions/8da0e1e71536_bill_tags.py diff --git a/backend/alembic/versions/92a7b9f03f89_add_legis_action.py b/backend/.alembic/versions/92a7b9f03f89_add_legis_action.py similarity index 100% rename from backend/alembic/versions/92a7b9f03f89_add_legis_action.py rename to backend/.alembic/versions/92a7b9f03f89_add_legis_action.py diff --git a/backend/alembic/versions/a3b78ac73761_create_summary_table.py b/backend/.alembic/versions/a3b78ac73761_create_summary_table.py similarity index 100% rename from backend/alembic/versions/a3b78ac73761_create_summary_table.py rename to backend/.alembic/versions/a3b78ac73761_create_summary_table.py diff --git a/backend/alembic/versions/af9ab9994117_add_congress_to_uniq_bill.py b/backend/.alembic/versions/af9ab9994117_add_congress_to_uniq_bill.py similarity index 100% rename from backend/alembic/versions/af9ab9994117_add_congress_to_uniq_bill.py rename to backend/.alembic/versions/af9ab9994117_add_congress_to_uniq_bill.py diff --git a/backend/alembic/versions/b1fe847bfdea_cascade_deletes.py b/backend/.alembic/versions/b1fe847bfdea_cascade_deletes.py similarity index 100% rename from backend/alembic/versions/b1fe847bfdea_cascade_deletes.py rename to backend/.alembic/versions/b1fe847bfdea_cascade_deletes.py diff --git a/backend/alembic/versions/b3b426df69a6_votes_datetime.py b/backend/.alembic/versions/b3b426df69a6_votes_datetime.py similarity index 100% rename from backend/alembic/versions/b3b426df69a6_votes_datetime.py rename to backend/.alembic/versions/b3b426df69a6_votes_datetime.py diff --git a/backend/alembic/versions/bc4ec35d0874_add_model_column.py b/backend/.alembic/versions/bc4ec35d0874_add_model_column.py similarity index 100% rename from backend/alembic/versions/bc4ec35d0874_add_model_column.py rename to backend/.alembic/versions/bc4ec35d0874_add_model_column.py diff --git a/backend/alembic/versions/bf2269ea67a1_action_parse_table.py b/backend/.alembic/versions/bf2269ea67a1_action_parse_table.py similarity index 95% rename from backend/alembic/versions/bf2269ea67a1_action_parse_table.py rename to backend/.alembic/versions/bf2269ea67a1_action_parse_table.py index cc1d4509..f7b94324 100644 --- a/backend/alembic/versions/bf2269ea67a1_action_parse_table.py +++ b/backend/.alembic/versions/bf2269ea67a1_action_parse_table.py @@ -13,7 +13,7 @@ from sqlalchemy.dialects import postgresql from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy import Text -import billparser +import congress_db # revision identifiers, used by Alembic. revision: str = "bf2269ea67a1" @@ -32,12 +32,12 @@ def upgrade() -> None: sa.Column("legislation_content_id", sa.Integer(), nullable=True), sa.Column( "actions", - billparser.db.models.CastingArray(JSONB(astext_type=Text())), + congress_db.models.CastingArray(JSONB(astext_type=Text())), nullable=True, ), sa.Column( "citations", - billparser.db.models.CastingArray(JSONB(astext_type=Text())), + congress_db.models.CastingArray(JSONB(astext_type=Text())), nullable=True, ), sa.ForeignKeyConstraint( diff --git a/backend/alembic/versions/c54322bdb307_votes_metadata.py b/backend/.alembic/versions/c54322bdb307_votes_metadata.py similarity index 100% rename from backend/alembic/versions/c54322bdb307_votes_metadata.py rename to backend/.alembic/versions/c54322bdb307_votes_metadata.py diff --git a/backend/alembic/versions/c941abf22042_add_usc_tracking.py b/backend/.alembic/versions/c941abf22042_add_usc_tracking.py similarity index 100% rename from backend/alembic/versions/c941abf22042_add_usc_tracking.py rename to backend/.alembic/versions/c941abf22042_add_usc_tracking.py diff --git a/backend/alembic/versions/c9d7e37be069_fix_cascade_deletes.py b/backend/.alembic/versions/c9d7e37be069_fix_cascade_deletes.py similarity index 100% rename from backend/alembic/versions/c9d7e37be069_fix_cascade_deletes.py rename to backend/.alembic/versions/c9d7e37be069_fix_cascade_deletes.py diff --git a/backend/alembic/versions/d01322760f6d_bioguide.py b/backend/.alembic/versions/d01322760f6d_bioguide.py similarity index 100% rename from backend/alembic/versions/d01322760f6d_bioguide.py rename to backend/.alembic/versions/d01322760f6d_bioguide.py diff --git a/backend/alembic/versions/ea85413bf51b_committee_upgrade.py b/backend/.alembic/versions/ea85413bf51b_committee_upgrade.py similarity index 100% rename from backend/alembic/versions/ea85413bf51b_committee_upgrade.py rename to backend/.alembic/versions/ea85413bf51b_committee_upgrade.py diff --git a/backend/alembic/versions/f141b3473b1f_auth_schema_and_nullable_fix.py b/backend/.alembic/versions/f141b3473b1f_auth_schema_and_nullable_fix.py similarity index 100% rename from backend/alembic/versions/f141b3473b1f_auth_schema_and_nullable_fix.py rename to backend/.alembic/versions/f141b3473b1f_auth_schema_and_nullable_fix.py diff --git a/backend/alembic/versions/f6488f13146c_initial_migration.py b/backend/.alembic/versions/f6488f13146c_initial_migration.py similarity index 99% rename from backend/alembic/versions/f6488f13146c_initial_migration.py rename to backend/.alembic/versions/f6488f13146c_initial_migration.py index c901b3af..67243bca 100644 --- a/backend/alembic/versions/f6488f13146c_initial_migration.py +++ b/backend/.alembic/versions/f6488f13146c_initial_migration.py @@ -13,7 +13,7 @@ from sqlalchemy.dialects import postgresql from sqlalchemy.schema import CreateSchema from sqlalchemy import Text -import billparser +import congress_db # revision identifiers, used by Alembic. revision: str = 'f6488f13146c' @@ -226,7 +226,7 @@ def upgrade() -> None: sa.Column('heading', sa.String(), nullable=True), sa.Column('content_str', sa.String(), nullable=True), sa.Column('content_type', sa.String(), nullable=True), - sa.Column('action_parse', billparser.db.models.CastingArray(JSONB(astext_type=Text())), nullable=True), + sa.Column('action_parse', congress_db.models.CastingArray(JSONB(astext_type=Text())), nullable=True), sa.Column('legislation_version_id', sa.Integer(), nullable=True), sa.ForeignKeyConstraint(['legislation_version_id'], ['legislation_version.legislation_version_id'], ondelete='CASCADE'), sa.ForeignKeyConstraint(['parent_id'], ['legislation_content.legislation_content_id'], ), diff --git a/backend/.docker/Dockerfile b/backend/.docker/Dockerfile index 1108bd7e..4863f3b2 100644 --- a/backend/.docker/Dockerfile +++ b/backend/.docker/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9-slim +FROM python:3.13-slim RUN apt update RUN apt install wget build-essential -y @@ -9,4 +9,4 @@ RUN pip3 install -r requirements.txt COPY . /usr/src/app WORKDIR /usr/src/app -ENTRYPOINT ["python3", "-m", "billparser"] \ No newline at end of file +ENTRYPOINT ["sh"] \ No newline at end of file diff --git a/backend/.docker/Dockerfile.fastapi b/backend/.docker/Dockerfile.fastapi deleted file mode 100644 index 449bb29e..00000000 --- a/backend/.docker/Dockerfile.fastapi +++ /dev/null @@ -1,13 +0,0 @@ -FROM python:3.9-slim - -RUN apt update -RUN apt install wget build-essential -y - -COPY requirements-fastapi.txt . -RUN pip3 install -r requirements-fastapi.txt - -COPY . /usr/src/app -WORKDIR /usr/src/app - -ENTRYPOINT ["uvicorn"] -CMD ["congress_fastapi.app:app", "--host", "0.0.0.0", "--port", "8080"] \ No newline at end of file diff --git a/backend/.gitignore b/backend/.gitignore index e97bb357..eddb7cbc 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -3,14 +3,21 @@ **.pyc **.zip + *.code-workspace bills/ notebooks/ usc/ .vscode/ *.ipynb +.pytest_cache +__pycache__ +**.pyc **.egg-info/ dist/ build/ -.sources/ \ No newline at end of file +.sources/ + +.venv +venv \ No newline at end of file diff --git a/backend/.python-version b/backend/.python-version deleted file mode 100644 index bd28b9c5..00000000 --- a/backend/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.9 diff --git a/backend/alembic.ini b/backend/alembic.ini index 665ef8ed..9995faa5 100644 --- a/backend/alembic.ini +++ b/backend/alembic.ini @@ -2,7 +2,7 @@ [alembic] # path to migration scripts -script_location = alembic +script_location = .alembic # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s # Uncomment the line below if you want the files to be prepended with date and time diff --git a/backend/billparser/__init__.py b/backend/billparser/__init__.py deleted file mode 100644 index e3ad0b32..00000000 --- a/backend/billparser/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from billparser.utils.logger import initialize_logger - -initialize_logger() \ No newline at end of file diff --git a/backend/billparser/db/queries.py b/backend/billparser/db/queries.py deleted file mode 100644 index e25e0dcc..00000000 --- a/backend/billparser/db/queries.py +++ /dev/null @@ -1,365 +0,0 @@ -from flask_sqlalchemy_session import current_session -from billparser.db.models import ( - USCChapter, - USCSection, - USCContent, - USCContentDiff, - Legislation, - LegislationVersion, - LegislationContent, - LegislationChamber, - LegislationVersionEnum, - LegislationActionParse, - Version, -) -from cachetools import cached, TTLCache -from sqlalchemy import or_, String, func -from sqlalchemy.sql.expression import cast -from sqlalchemy.sql import alias -import re - -from typing import Dict, List - -import platform - -windows = platform.system() == "Windows" - -DEFAULT_VERSION_ID = 1 -CACHE_TIME = 600 - -if windows: - CACHE_TIME = 0 - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_chapters(version_id=DEFAULT_VERSION_ID) -> List[USCChapter]: - """ - Gets all the Chapters for the current version - - Returns: - List[USCChapter]: A list of all the - """ - latest_base = get_latest_base() - results = ( - current_session.query(USCChapter) - .filter(USCChapter.version_id == latest_base.version_id) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_bills( - house: int, senate: int, query: str, incl: str, decl: str -) -> List[Legislation]: - """ - Gets the Bill rows according to the filters. - - Args: - house (int): Include House bills - senate (int): Include Senate bills - query (str): Text to search for in the title - incl (str): Versions to include - decl (str): Versions to exclude - - Returns: - List[Legislation]: Bill objects that pass the above filters - """ - results = current_session.query(Legislation).join(LegislationVersion) - if house != 1: - results = results.filter(Legislation.chamber != LegislationChamber.House) - if senate != 1: - results = results.filter(Legislation.chamber != LegislationChamber.Senate) - if len(query) > 0: - results = results.filter( - or_( - Legislation.title.ilike(f"%{query}%"), - cast(Legislation.number, String).like( - re.sub(r"[^0-9\s]", "", query).strip() - ), - ) - ) - if incl != "": - incl_set = [ - LegislationVersionEnum(x.upper()) - for x in incl.split(",") - if x.upper() in LegislationVersionEnum.__members__ - ] - results = results.filter(LegislationVersion.legislation_version.in_(incl_set)) - if decl != "": - decl_set = [ - LegislationVersionEnum(x.upper()) - for x in decl.split(",") - if x.upper() in LegislationVersionEnum.__members__ - ] - results = results.filter(~LegislationVersion.legislation_version.in_(decl_set)) - results = results.order_by(Legislation.number).limit(100).all() - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_versions() -> List[Version]: - """ - Gets a list of all the Version rows that correspond to Bills - - Returns: - List[Version]: List of Versions corresponding to the Bill versions - """ - results = current_session.query(Version).filter(Version.base_id is not None).all() - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_revisions() -> List[Version]: - """ - Gets a list of all the Version rows that correspond to USCode revisions - - Returns: - List[Version]: List of Versions that are USCode revisions - """ - results = current_session.query(Version).filter(Version.base_id is None).all() - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_latest_sections(chapter_number: str) -> List[USCSection]: - """ - Gets the sections for the given Chapter, from the first USCode revision in the table - - Args: - chapter_number (str): Given Chapter.number to look for - - Returns: - List[USCSection]: List of Sections from the given Chapter - """ - latest_base = ( - current_session.query(Version).filter(Version.base_id == None).all()[0] - ) - chapter = ( - current_session.query(USCChapter) - .filter(USCChapter.version_id == latest_base.version_id) - .filter(USCChapter.short_title == chapter_number) - .first() - ) - results = ( - current_session.query(USCSection) - .filter(USCSection.usc_chapter_id == chapter.usc_chapter_id) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_sections(chapter_id: int, version_id: int) -> List[USCSection]: - """ - Gets the sections from the chapter and version id - - Args: - chapter_id (int): Chapter id to look at - version_id (int): Version id to look at - - Returns: - List[USCSection]: List of sections that match the criteria - """ - results = ( - current_session.query(USCSection) - .filter( - USCSection.usc_chapter_id == chapter_id, USCSection.version_id == version_id - ) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_latest_content(chapter_number: str, section_number: str) -> List[USCContent]: - """ - Converts a chapter number and section number into chapter and version ids - Then calls the get_content function with those arguments - - Args: - chapter_number (str): The Chapter number to search for - section_number (str): The Section number to search for - - Returns: - List[USCContent]: List of Contents from the given section - """ - latest_base = get_latest_base() - chapter = ( - current_session.query(USCChapter) - .filter(USCChapter.version_id == latest_base.version_id) - .filter(USCChapter.short_title == chapter_number) - .first() - ) - section = ( - current_session.query(USCSection) - .filter(USCSection.version_id == latest_base.version_id) - .filter(USCSection.number == section_number) - .filter(USCSection.usc_chapter_id == chapter.usc_chapter_id) - .first() - ) - return get_content(section.usc_section_id, latest_base.version_id) - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_content(section_id: int, version_id: int) -> List[USCContent]: - """ - Gets the contents of a given Section in a given Version - - TODO: Is version id redundent here? - - Args: - section_id (int): Section id to look at - version_id (int): Version id to look at - - Returns: - List[USCContent]: Content that passes the above filter - """ - results = ( - current_session.query(USCContent) - .filter( - USCContent.usc_section_id == section_id, USCContent.version_id == version_id - ) - .order_by(USCContent.order_number.asc()) - .all() - ) - return results - - -# TODO: Need to fix -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_content_versions(bill_version_id: int) -> List[USCContent]: - """ - Returns the content versions for a given bill version id - - Args: - bill_version_id (int): Given bill version id - - Returns: - List[Content]: List of Content that corresponds to a given Bill - """ - results = ( - current_session.query(USCContent) - .filter( - USCContent.version_id == bill_version_id, - ) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_diffs(bill_version_id: int) -> List[USCContentDiff]: - """ - Gets the USCContentDiff for a given bill_version_id - - Args: - bill_version_id (int): Target bill version id - - Returns: - List[USCContentDiff]: List of USCContentDiff for the bill version - """ - legis_vers = ( - current_session.query(LegislationVersion) - .filter(LegislationVersion.legislation_version_id == bill_version_id) - .all() - ) - if len(legis_vers) == 0: - return [] - results = ( - current_session.query(USCContentDiff) - .filter(USCContentDiff.version_id == legis_vers[0].version_id) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_bill_contents(bill_version_id: int) -> List[LegislationContent]: - """ - Returns the LegislationContent for a given bill_version - - Args: - bill_version_id (int): LegislationContent PK on the LegislationContent table - - Returns: - List[LegislationContent]: Matching LegislationContent rows - """ - results = ( - current_session.query(LegislationContent) - .filter(LegislationContent.legislation_version_id == bill_version_id) - .all() - ) - return results - - -@cached(cache=TTLCache(maxsize=512, ttl=CACHE_TIME)) -def get_bill_metadata(bill_version_id: int) -> dict: - bill_version = ( - current_session.query(LegislationVersion) - .filter(LegislationVersion.legislation_version_id == bill_version_id) - .all() - ) - if len(bill_version) > 0: - bill = ( - current_session.query(Legislation) - .filter(Legislation.legislation_id == bill_version[0].legislation_id) - .all() - ) - if len(bill) > 0: - return { - "chamber": bill[0].chamber.value, - "number": bill[0].number, - "version": bill_version[0].legislation_version.value.lower(), - } - - return {} - - -def get_revision_diff(base_id: int, new_id: int): - # Gets the diffs between two versions - # SELECT t1.section_id as old_s_id, t2.section_id as new_s_id, t1.usc_ident, t1.section_display as old_s_d, t2.section_display as new_s_d, t1.heading, t2.heading FROM sections as t1 JOIN sections as t2 ON t2.version_id = 2 WHERE t1.version_id = 1 AND t1.usc_ident = t2.usc_ident AND (t1.heading != t2.heading) LIMIT 5000; - new_sections: Section = Section.alias("new") - old_sections: Section = Section.alias("old") - results = ( - current_session.query(new_sections.usc_section_id, old_sections.usc_section_id) - .join(old_sections, old_sections.version_id == new_id) - .filter(new_sections.version_id == base_id) - .filter(old_sections.usc_ident == new_sections.usc_ident) - .filter(old_sections.heading != new_sections.heading) - ).all() - return results - - -def get_latest_base() -> Version: - try: - return current_session.query(Version).filter(Version.base_id == None).all()[0] - except Exception: - return None - - -def check_for_action_parses(legislation_version_id: List[int]) -> Dict[int, int]: - """ - Return a dict of the number of action parses for each legislation_version_id - """ - - results = ( - current_session.query( - LegislationActionParse.legislation_version_id, - func.count(LegislationActionParse.legislation_action_parse_id), - ) - .filter( - LegislationActionParse.legislation_version_id.in_(legislation_version_id) - ) - .group_by(LegislationActionParse.legislation_version_id) - .all() - ) - return {x[0]: x[1] for x in results} - - -def get_legislation_versions() -> List[LegislationVersion]: - """ - Returns a list of all legislation_version_ids - """ - results = current_session.query(LegislationVersion).all() - return [x[0] for x in results] \ No newline at end of file diff --git a/backend/billparser/importers/releases.py b/backend/billparser/importers/releases.py deleted file mode 100644 index 195b5a70..00000000 --- a/backend/billparser/importers/releases.py +++ /dev/null @@ -1,131 +0,0 @@ -import argparse -import html -import os -import zipfile -from datetime import datetime -from billparser.importers.bills import download_path -from joblib import Parallel, delayed -import requests -from sqlalchemy import func -from billparser.db.handler import import_title, get_number, Session -from billparser.db.models import USCRelease, Version - -THREADS = int(os.environ.get("PARSE_THREADS", -1)) -DOWNLOAD_BASE = "https://uscode.house.gov/download/{}" -RELEASE_POINTS = "https://uscode.house.gov/download/priorreleasepoints.htm" - - -def main(): - parser = argparse.ArgumentParser(description="Process release points.") - parser.add_argument( - "--release-point", - type=str, - help="URL of the zip file to process a single release point", - ) - args = parser.parse_args() - - if args.release_point: - process_single_release_point(args.release_point) - else: - process_all_release_points() - - -def process_single_release_point(url, release=None): - zip_file_path = download_path(url) - with zipfile.ZipFile(zip_file_path) as zip_file: - if release is None: - session = Session() - new_version = Version(base_id=None) - session.add(new_version) - session.flush() - release = USCRelease( - short_title=zip_file_path.split("/")[-1].split(".")[0], - effective_date=datetime.now(), - long_title="", - version_id=new_version.version_id, - ) - session.add(release) - session.commit() - files = zip_file.namelist() - files = sorted( - files, key=lambda x: get_number(x.split(".")[0].replace("usc", "")) - ) - Parallel(n_jobs=THREADS, verbose=5, backend="loky")( - delayed(import_title)( - zip_file.open(file).read(), - file.split(".")[0].replace("usc", ""), - None, # Assuming title is not needed for single release point - release.to_dict(), # Assuming release_point.to_dict() is not needed for single release point - ) - for file in files - ) - - -def process_all_release_points(): - release_points = [] - response = requests.get(RELEASE_POINTS) - tree = html.fromstring(response.content) - - for year in range(2022, datetime.now().year, 2): - search_date = f"12/21/{year}" - links = tree.xpath(f'//a[contains(text(), "{search_date}")]/@href') - - if len(links) > 0: - link = links[0].replace("usc-rp", "xml_uscAll").replace(".htm", ".zip") - zipPath = DOWNLOAD_BASE.format(link) - match = re.search(r"@(\d+)-(\d+)\.zip", link) - - release_points.append( - { - "date": search_date, - "short_title": f"Public Law {match.group(1)}-{match.group(2)}", - "long_title": "", - "url": zipPath, - } - ) - - session = Session() - for rp in release_points: - existing_rp = ( - session.query(USCRelease) - .filter( - USCRelease.short_title == rp.get("short_title"), - func.date(USCRelease.effective_date) - == datetime.strptime(rp.get("date"), "%m/%d/%Y"), - ) - .all() - ) - if len(existing_rp) > 0: - print("Already in DB - Skipping") - continue - new_version = Version(base_id=None) - session.add(new_version) - session.commit() - release_point = USCRelease( - short_title=rp.get("short_title"), - effective_date=datetime.strptime(rp.get("date"), "%m/%d/%Y"), - long_title=rp.get("long_title"), - version_id=new_version.version_id, - ) - session.add(release_point) - session.commit() - zip_file_path = download_path(rp.get("url")) - with zipfile.ZipFile(f"usc/{zip_file_path}") as zip_file: - files = zip_file.namelist() - - files = sorted( - files, key=lambda x: get_number(x.split(".")[0].replace("usc", "")) - ) - Parallel(n_jobs=THREADS, verbose=5, backend="multiprocessing")( - delayed(import_title)( - zip_file.open(file).read(), - file.split(".")[0].replace("usc", ""), - rp.get("title"), - release_point.to_dict(), - ) - for file in files # if "09" in file - ) - - -if __name__ == "__main__": - main() diff --git a/backend/billparser/nightly.py b/backend/billparser/nightly.py deleted file mode 100644 index 036a52bf..00000000 --- a/backend/billparser/nightly.py +++ /dev/null @@ -1,9 +0,0 @@ -from billparser.downloader import download -from billparser.run_through import run_archives -from billparser.prune import run_prune - -# TODO: Put this back onto a cron job -if __name__ == "__main__": - download() - run_archives() - run_prune() diff --git a/backend/billparser/tests/routes.py b/backend/billparser/tests/routes.py deleted file mode 100644 index 67fae855..00000000 --- a/backend/billparser/tests/routes.py +++ /dev/null @@ -1,625 +0,0 @@ -from unittest import TestCase, mock -import json -from billparser.__main__ import ( - bills, - app, - bill_content, - bill_content_tree, - titles, - versions, - revisions, - version, - latest_sections, - sections, - contents, -) - -from billparser.db.models import ( - USCChapter, - USCSection, - USCContent, - USCContentDiff, - Version, - Legislation, - LegislationVersion, - LegislationContent, - LegislationVersionEnum, - LegislationChamber, - LegislationType, -) - -# This is to ensure that the return values are the same -# No matter what, these return values shouldn't change, or the frontend -# Will need to change -class TestRoutes(TestCase): - @mock.patch("billparser.__main__.get_bills", return_value=[]) - def test_bills_no_version(self, mock_get_bills): - """ - Should be returning a dict where the key is the bill, and the value is the bill metadata - """ - mock_get_bills.return_value = [ - Legislation( - legislation_id=1, - legislation_type=LegislationType.Bill, - chamber=LegislationChamber.House, - title="Test House Bill", - number=1 - ), - Legislation( - legislation_id=2, - legislation_type=LegislationType.Bill, - chamber=LegislationChamber.Senate, - title="Test Senate Bill", - number=5 - ), - ] - with app.app.test_request_context(): - resp = bills() - self.assertEqual( - json.dumps( - { - "H-1": { - "bill_id": "1", - "chamber": "House", - "bill_type": "BillTypes.Bill", - "bill_number": "1", - "bill_title": "Test House Bill", - "versions": [], - }, - "S-5": { - "bill_id": "2", - "chamber": "Senate", - "bill_type": "BillTypes.Bill", - "bill_number": "5", - "bill_title": "Test Senate Bill", - "versions": [], - }, - } - ), - resp, - ) - - @mock.patch("billparser.__main__.get_bills", return_value=[]) - def test_bills_with_version(self, mock_get_bills): - """ - Should be returning a dict where the key is the bill, and the value is the bill metadata - Should also include the given bill versions for the bill - """ - - - mock_get_bills.return_value = [ - Legislation( - legislation_id=1, - legislation_type=LegislationType.Bill, - chamber=LegislationChamber.House, - title="Test House Bill", - number=1, - versions=[ - LegislationVersion( - legislation_version_id=1, - legislation_id=1, - legislation_version=LegislationVersionEnum.IH, - ) - ], - ), - Legislation( - legislation_id=2, - legislation_type=LegislationType.Bill, - chamber=LegislationChamber.Senate, - title="Test Senate Bill", - number=5, - versions=[ - LegislationVersion( - legislation_version_id=2, - legislation_id=2, - legislation_version=LegislationVersionEnum.IS, - ) - ], - ), - ] - with app.app.test_request_context(): - resp = bills() - self.assertEqual( - json.dumps( - { - "H-1": { - "bill_id": "1", - "chamber": "House", - "bill_type": "BillTypes.Bill", - "bill_number": "1", - "bill_title": "Test House Bill", - "versions": [ - { - "bill_version_id": "1", - "bill_id": "1", - "bill_version": "ih", - # "base_version_id": "1", # This was removed in the translation - } - ], - }, - "S-5": { - "bill_id": "2", - "chamber": "Senate", - "bill_type": "BillTypes.Bill", - "bill_number": "5", - "bill_title": "Test Senate Bill", - "versions": [ - { - "bill_version_id": "2", - "bill_id": "2", - "bill_version": "is", - # "base_version_id": "1", # This was removed in the translation - } - ], - }, - } - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_bill_contents", return_value=[]) - def test_bill_content_1(self, mock_get_bill_contents): - """ - Should return the bill content objects - """ - mock_get_bill_contents.return_value = [ - LegislationContent( - legislation_content_id=1, - parent_id=None, - order_number=0, - section_display="SS 1.)", - heading="Test heading", - content_str="Test content", - legislation_version_id=1, - content_type="section", - action_parse=[], - ), - ] - with app.app.test_request_context(): - resp = bill_content("1") - self.assertEqual( - json.dumps( - [ - { - "bill_content_id": 1, - "content_type": "section", - "order": 0, - # "number": "1", # Removed - "display": "SS 1.)", - "heading": "Test heading", - "content": "Test content", - "version": "1", - } - ] - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_bill_contents", return_value=[]) - def test_bill_content_2(self, mock_get_bill_contents): - """ - Should return the bill content objects, multiple contents - """ - self.maxDiff = None - mock_get_bill_contents.return_value = [ - LegislationContent( - legislation_content_id=1, - parent_id=None, - order_number=0, - section_display="SS 1.)", - heading="Test heading", - content_str="Test content", - legislation_version_id=1, - content_type="section", - action_parse=[], - ), - LegislationContent( - legislation_content_id=2, - parent_id="1", - order_number=0, - section_display="a.)", - heading="", - content_str="Test subcontent", - legislation_version_id=1, - content_type="legis-body", - action_parse=[], - ), - ] - with app.app.test_request_context(): - resp = bill_content("1") - self.assertEqual( - json.dumps( - [ - { - "bill_content_id": 1, - "content_type": "section", - "order": 0, - # "number": "1", # Removed - "display": "SS 1.)", - "heading": "Test heading", - "content": "Test content", - "version": "1", - }, - { - "bill_content_id": 2, - "content_type": "legis-body", - "order": 0, - "parent": "1", - # "number": "a", # Removed - "display": "a.)", - "heading": "", - "content": "Test subcontent", - "version": "1", - }, - ] - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_bill_metadata", return_value=[]) - @mock.patch("billparser.__main__.get_bill_contents", return_value=[]) - def test_bill_content_tree_1(self, mock_get_bill_contents, mock_get_bill_metadata): - """ - Should return the bill content objects, and metadata - """ - mock_get_bill_metadata.return_value = { - "chamber": "House", - "number": "12", - "version": "1", - } - mock_get_bill_contents.return_value = [ - LegislationContent( - legislation_content_id=1, - parent_id=None, - order_number=0, - section_display="SS 1.)", - heading="Test heading", - content_str="Test content", - legislation_version_id=1, - content_type="section", - action_parse=[], - ), - LegislationContent( - legislation_content_id=2, - parent_id=1, - order_number=0, - section_display="a.)", - heading="", - content_str="Test subcontent", - legislation_version_id=1, - content_type="legis-body", - action_parse=[], - ), - ] - with app.app.test_request_context(): - resp = bill_content_tree("1") - self.assertEqual( - json.dumps( - { - "content": { - "bill_content_id": 1, - "content_type": "section", - "order": 0, - #"number": "1", # Removed - "display": "SS 1.)", - "heading": "Test heading", - "content": "Test content", - "version": "1", - "child": [ - { - "bill_content_id": 2, - "content_type": "legis-body", - "order": 0, - "parent": 1, - # "number": "a", # Removed - "display": "a.)", - "heading": "", - "content": "Test subcontent", - "version": "1", - "child": [], - } - ], - }, - "metadata": { - "chamber": "House", - "number": "12", - "version": "1", - }, - } - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_chapters", return_value=[]) - def test_chapters(self, mock_get_chapters): - """ - Should return the chapter objects - """ - mock_get_chapters.return_value = [ - USCChapter( - usc_chapter_id=1, - usc_ident="/usc/1", - short_title="01", - document="usc", - version_id=1, - ) - ] - with app.app.test_request_context(): - resp = titles() - self.assertEqual( - json.dumps( - [{"chapter_id": 1, "ident": "/usc/1", "number": "01", "version": 1}] - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_versions", return_value=[]) - def test_versions(self, mock_get_versions): - """ - Should return the version objects - """ - mock_get_versions.return_value = [ - Version(version_id=1, base_id=1) - ] - with app.app.test_request_context(): - resp = versions() - self.assertEqual( - json.dumps([{"version_id": 1, "title": "Legacy Title", "base_id": 1}]), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_revisions", return_value=[]) - def test_revisions(self, mock_get_versions): - """ - Should return the version objects without a base id - """ - mock_get_versions.return_value = [ - Version( - version_id=1, base_id=None - ) - ] - with app.app.test_request_context(): - resp = revisions() - self.assertEqual( - json.dumps([{"version_id": 1, "title": "Legacy Title"}]), resp, resp, - ) - - @mock.patch("billparser.__main__.get_content_versions", return_value=[]) - @mock.patch("billparser.__main__.get_diffs", return_value=[]) - def test_get_version(self, mock_get_diffs, mock_get_content_versions): - """ - Should return the version objects without a base id - """ - mock_get_content_versions.return_value = [ - USCContent( - usc_content_id=1, - usc_section_id=1, - parent_id=None, - usc_ident="/usc/s1/1", - usc_guid="1-2-3", - number="1", - section_display="S 1.)", - heading="Test - heading", - content_str="content - str", - version_id=1, - ) - ] - mock_get_diffs.return_value = [ - USCContentDiff( - usc_content_diff_id=1, - usc_chapter_id=1, - usc_section_id=1, - usc_content_id=1, - order_number=0, - number="1", - section_display="test", - heading="test - heading", - content_str="test - content", - version_id=1, - ) - ] - with app.app.test_request_context(json={"version": 1}): - resp = version() - self.assertEqual( - json.dumps( - { - "diffs": [ - { - "id": 1, - "content_id": 1, - "section_id": 1, - "chapter_id": 1, - "order": 0, - "number": "1", - "display": "test", - "heading": "test - heading", - "content": "test - content", - "version": 1, - } - ], - "contents": [ - { - "content_id": 1, - "section_id": 1, - "ident": "/usc/s1/1", - "number": "1", - "display": "S 1.)", - "heading": "Test - heading", - "content": "content - str", - "version": 1, - } - ], - } - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_latest_sections", return_value=[]) - def test_latest_sections(self, mock_get_sections): - """ - Should return the section objects - """ - mock_get_sections.return_value = [ - USCSection( - usc_section_id=1, - usc_ident="/usc/01/s1", - number="1", - section_display="S 1.)", - heading="Test - Heading", - usc_chapter_id=1, - version_id=1, - ) - ] - - with app.app.test_request_context(): - resp = latest_sections("1") - self.assertEqual( - json.dumps( - [ - { - "section_id": 1, - "ident": "/usc/01/s1", - "number": "1", - "display": "S 1.)", - "heading": "Test - Heading", - "chapter_id": 1, - "version": 1, - } - ] - ), - resp, - resp, - ) - - @mock.patch("billparser.__main__.get_latest_sections", return_value=[]) - def test_latest_sections(self, mock_get_sections): - """ - Should return the section objects - """ - mock_get_sections.return_value = [ - USCSection( - usc_section_id=1, - usc_ident="/usc/01/s1", - number="1", - section_display="S 1.)", - heading="Test - Heading", - usc_chapter_id=1, - version_id=1, - ) - ] - - with app.app.test_request_context(): - resp = latest_sections("1") - self.assertEqual( - json.dumps( - [ - { - "section_id": 1, - "ident": "/usc/01/s1", - "number": "1", - "display": "S 1.)", - "heading": "Test - Heading", - "chapter_id": 1, - "version": 1, - } - ] - ), - resp, - resp, - ) - - @mock.patch( - "billparser.__main__.get_latest_base", return_value=Version(version_id=1) - ) - @mock.patch("billparser.__main__.get_sections", return_value=[]) - def test_sections(self, mock_get_sections, mock_get_latest_base): - """ - Should return the section objects - """ - mock_get_sections.return_value = [ - USCSection( - usc_section_id=1, - usc_ident="/usc/01/s1", - number="1", - section_display="S 1.)", - heading="Test - Heading", - usc_chapter_id=1, - version_id=1, - ) - ] - - with app.app.test_request_context(): - resp = sections("1") - self.assertEqual( - json.dumps( - [ - { - "section_id": 1, - "ident": "/usc/01/s1", - "number": "1", - "display": "S 1.)", - "heading": "Test - Heading", - "chapter_id": 1, - "version": 1, - } - ] - ), - resp, - resp, - ) - - @mock.patch( - "billparser.__main__.get_latest_base", return_value=Version(version_id=1) - ) - @mock.patch("billparser.__main__.get_content", return_value=[]) - def test_contents(self, mock_get_content, mock_get_latest_base): - """ - Should return the content objects - """ - mock_get_content.return_value = [ - USCContent( - usc_content_id=1, - usc_section_id=1, - parent_id=None, - order_number=0, - usc_ident="/usc/01/s1", - usc_guid="1-2-3", - number="1", - section_display="S 1.)", - heading="Test - Heading", - content_str="Content - Str", - version_id=1, - content_type="legis-body", - ) - ] - - with app.app.test_request_context(): - resp = contents("1") - self.assertEqual( - json.dumps( - [ - { - "content_id": 1, - "content_type": "legis-body", - "section_id": 1, - "order": 0, - "ident": "/usc/01/s1", - "number": "1", - "display": "S 1.)", - "heading": "Test - Heading", - "content": "Content - Str", - "version": 1, - } - ] - ), - resp, - resp, - ) - diff --git a/backend/congress_api/__main__.py b/backend/congress_api/__main__.py index de94f82f..949f3d40 100644 --- a/backend/congress_api/__main__.py +++ b/backend/congress_api/__main__.py @@ -9,9 +9,9 @@ from flask_sqlalchemy_session import current_session, flask_scoped_session from sqlalchemy.orm import sessionmaker -from billparser.db.handler import DATABASE_URI +from congress_db.session import DATABASE_URI from congress_api import encoder -from billparser.utils.logger import initialize_logger +from congress_parser.utils.logger import initialize_logger initialize_logger() CACHE_HEADER_TIME = int(os.environ.get("CACHE_HEADER_TIME", 0)) diff --git a/backend/congress_api/db/chamber_queries.py b/backend/congress_api/db/chamber_queries.py index 82f127c1..c6e3a5a3 100644 --- a/backend/congress_api/db/chamber_queries.py +++ b/backend/congress_api/db/chamber_queries.py @@ -10,7 +10,7 @@ from sqlalchemy.sql.expression import cast from sqlalchemy import Date -from billparser.db.models import ( +from congress_db.models import ( Congress, Legislation, LegislationChamber, diff --git a/backend/congress_api/db/legislation_queries.py b/backend/congress_api/db/legislation_queries.py index 4b041d65..aaed75fb 100644 --- a/backend/congress_api/db/legislation_queries.py +++ b/backend/congress_api/db/legislation_queries.py @@ -8,7 +8,7 @@ from sqlalchemy import distinct from sqlalchemy.sql.functions import func from sqlalchemy.dialects import postgresql -from billparser.db.models import ( +from congress_db.models import ( Congress, Legislation, LegislationContent, diff --git a/backend/congress_api/db/session_queries.py b/backend/congress_api/db/session_queries.py index 8acfbe41..7c7eaad9 100644 --- a/backend/congress_api/db/session_queries.py +++ b/backend/congress_api/db/session_queries.py @@ -4,7 +4,7 @@ from cachetools import TTLCache, cached from flask_sqlalchemy_session import current_session -from billparser.db.models import Congress +from congress_db.models import Congress from congress_api.models.session_metadata import SessionMetadata # noqa: E501 CACHE_TIME = int(os.environ.get("CACHE_TIME", 0)) diff --git a/backend/congress_api/db/uscode_queries.py b/backend/congress_api/db/uscode_queries.py index 6fe26954..bbcbba76 100644 --- a/backend/congress_api/db/uscode_queries.py +++ b/backend/congress_api/db/uscode_queries.py @@ -5,7 +5,7 @@ from flask_sqlalchemy_session import current_session from sqlalchemy import desc -from billparser.db.models import USCChapter, USCContent, USCRelease, USCSection +from congress_db.models import USCChapter, USCContent, USCRelease, USCSection from congress_api.models.release_point_list import ReleasePointList from congress_api.models.release_point_metadata import ReleasePointMetadata from congress_api.models.usc_section_content import USCSectionContent diff --git a/backend/congress_api/test/__init__.py b/backend/congress_api/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/congress_api/test/controllers/__init__.py b/backend/congress_api/test/controllers/__init__.py deleted file mode 100644 index a5485b70..00000000 --- a/backend/congress_api/test/controllers/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging - -import connexion -from flask_testing import TestCase - -from congress_api.encoder import JSONEncoder - - -class BaseTestCase(TestCase): - def create_app(self): - logging.getLogger("connexion.operation").setLevel("ERROR") - app = connexion.App(__name__, specification_dir="../openapi/") - app.app.json_encoder = JSONEncoder - app.add_api("openapi.yaml", pythonic_params=True) - return app.app diff --git a/backend/congress_api/test/controllers/test_legislation_controller.py b/backend/congress_api/test/controllers/test_legislation_controller.py deleted file mode 100644 index 1e80c399..00000000 --- a/backend/congress_api/test/controllers/test_legislation_controller.py +++ /dev/null @@ -1,197 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import - -import unittest - - -from congress_api.test.controllers import BaseTestCase - - -class TestLegislationController(BaseTestCase): - """LegislationController integration test stubs""" - - def test_get_bill_summary(self): - """Test case for get_bill_summary - - - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bil}/{bill}".format( - session="116", chamber="chamber_example", bill="bill_example" - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_bill_version_amdts(self): - """Test case for get_bill_version_amdts - - - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bil}/{bill}/{base_version}/amendments/{new_version}".format( - session="116", - chamber="chamber_example", - bill="bill_example", - base_version="base_version_example", - new_version="new_version_example", - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_bill_version_diffs(self): - """Test case for get_bill_version_diffs - - - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bil}/{bill}/{version}/diffs".format( - session="116", - chamber="chamber_example", - bill="bill_example", - version="version_example", - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_bill_version_summary(self): - """Test case for get_bill_version_summary - - - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bil}/{bill}/{version}/summary".format( - session="116", - chamber="chamber_example", - bill="bill_example", - version="version_example", - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_bill_version_text(self): - """Test case for get_bill_version_text - - - """ - query_string = [("include_parsed", False)] - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bil}/{bill}/{version}/text".format( - session="116", - chamber="chamber_example", - bill="bill_example", - version="version_example", - ), - method="GET", - headers=headers, - query_string=query_string, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_chamber_bills(self): - """Test case for get_chamber_bills - - - """ - query_string = [("page_size", 25), ("page", 1)] - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber_bill}".format( - session="116", chamber="chamber_example" - ), - method="GET", - headers=headers, - query_string=query_string, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_chamber_summary(self): - """Test case for get_chamber_summary - - Specific chamber - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}/{chamber}".format( - session="116", chamber="chamber_example" - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_congress_search(self): - """Test case for get_congress_search - - Your GET endpoint - """ - query_string = [ - ("congress", "congress_example"), - ("chamber", "chamber_example"), - ("versions", "versions_example"), - ("text", "text_example"), - ("page", 1), - ("pageSize", 25), - ] - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/search", method="GET", headers=headers, query_string=query_string - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_session_summary(self): - """Test case for get_session_summary - - Specific Session - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/congress/{session}".format(session="116"), method="GET", headers=headers - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_sessions_summary(self): - """Test case for get_sessions_summary - - Congress Sessions - """ - headers = { - "Accept": "application/json", - } - response = self.client.open("/congress", method="GET", headers=headers) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - -if __name__ == "__main__": - unittest.main() diff --git a/backend/congress_api/test/controllers/test_uscode_controller.py b/backend/congress_api/test/controllers/test_uscode_controller.py deleted file mode 100644 index 96f51d5a..00000000 --- a/backend/congress_api/test/controllers/test_uscode_controller.py +++ /dev/null @@ -1,79 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import - -import unittest - -from congress_api.test.controllers import BaseTestCase - - -class TestUscodeController(BaseTestCase): - """UscodeController integration test stubs""" - - def test_get_usc_release_sections(self): - """Test case for get_usc_release_sections - - Your GET endpoint - """ - query_string = [("page", 1), ("pageSize", 1000)] - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/usc/{release_vers}/{short_title}/sections".format( - release_vers="release_vers_example", short_title="short_title_example" - ), - method="GET", - headers=headers, - query_string=query_string, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_usc_release_text(self): - """Test case for get_usc_release_text - - Your GET endpoint - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/usc/{release_vers}/{short_tile}/{section_number}/text".format( - release_vers="release_vers_example", - short_tile="short_tile_example", - section_number="section_number_example", - ), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_usc_release_titles(self): - """Test case for get_usc_release_titles - - Your GET endpoint - """ - headers = { - "Accept": "application/json", - } - response = self.client.open( - "/usc/{release_vers}/titles".format(release_vers="release_vers_example"), - method="GET", - headers=headers, - ) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - def test_get_usc_releases(self): - """Test case for get_usc_releases - - Your GET endpoint - """ - headers = { - "Accept": "application/json", - } - response = self.client.open("/usc/releases", method="GET", headers=headers) - self.assert200(response, "Response body is : " + response.data.decode("utf-8")) - - -if __name__ == "__main__": - unittest.main() diff --git a/backend/congress_api/test/db/__init__.py b/backend/congress_api/test/db/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/congress_api/test/db/test_chamber_queries.py b/backend/congress_api/test/db/test_chamber_queries.py deleted file mode 100644 index e8dd04ab..00000000 --- a/backend/congress_api/test/db/test_chamber_queries.py +++ /dev/null @@ -1,60 +0,0 @@ -from unittest import TestCase, mock, skipUnless - - -DB_MOCKED = True -if not DB_MOCKED: - from congress_api.db.chamber_queries import ( - get_chamber_summary_obj, - get_chamber_bills_list, - search_legislation, - ) - - -class TestGetChamberSummary(TestCase): - def test_raises_type_error_for_session(self): - with self.assertRaises(TypeError): - get_chamber_summary_obj("not int", "House") - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_returns_none_for_no_results(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_returns_counts_for_bills(self): - pass - - -class TestGetChamberBillList(TestCase): - def test_raises_type_error_for_session(self): - with self.assertRaises(TypeError): - get_chamber_bills_list("not int", "House", 25, 1) - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_computes_limits_and_offsets(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_returns_details_for_bills(self): - pass - - -class TestSearchLegislation(TestCase): - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_searches_given_congress(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_searches_given_chambers_house(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_searches_given_chambers_senate(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_searches_given_chambers_both(self): - pass - - @skipUnless(DB_MOCKED, "Need to mock SQLAlchemy") - def test_searches_given_bill_statuses(self): - pass diff --git a/backend/congress_api/test/db/test_legislation_queries.py b/backend/congress_api/test/db/test_legislation_queries.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/congress_api/test/db/test_session_queries.py b/backend/congress_api/test/db/test_session_queries.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/congress_api/test/db/test_uscode_queries.py b/backend/congress_api/test/db/test_uscode_queries.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/billparser/bioguide/__init__.py b/backend/congress_db/__init__.py similarity index 100% rename from backend/billparser/bioguide/__init__.py rename to backend/congress_db/__init__.py diff --git a/backend/billparser/db/models.py b/backend/congress_db/models.py similarity index 100% rename from backend/billparser/db/models.py rename to backend/congress_db/models.py diff --git a/backend/congress_db/session.py b/backend/congress_db/session.py new file mode 100644 index 00000000..6bf4178d --- /dev/null +++ b/backend/congress_db/session.py @@ -0,0 +1,37 @@ +import os + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, scoped_session +from sqlalchemy.pool import NullPool + +username = os.environ.get("db_user", "parser") +password = os.environ.get("db_pass", "parser") +table = os.environ.get("db_table", "us_code_2025") +db_host = os.environ.get("db_host", "0.0.0.0:5432") + +DATABASE_URI = f"postgresql://{username}:{password}@{db_host}/{table}" +engine = create_engine( + DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} +) + +Session = scoped_session(sessionmaker(bind=engine)) + +def init_session(): + """Initialize the Session object in the current process.""" + global Session + engine = create_engine( + DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} + ) + engine.dispose() + Session = scoped_session(sessionmaker(bind=engine)) + + +def get_scoped_session(): + """Initialize the Session object in the current process.""" + global Session + engine = create_engine( + DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} + ) + Session = scoped_session(sessionmaker(bind=engine)) + return Session() + diff --git a/backend/congress_fastapi/handlers/committees.py b/backend/congress_fastapi/handlers/committees.py index e261c1a4..a775aa3a 100644 --- a/backend/congress_fastapi/handlers/committees.py +++ b/backend/congress_fastapi/handlers/committees.py @@ -3,7 +3,7 @@ from sqlalchemy import select, and_, or_, func, asc, desc, insert, update, delete from sqlalchemy.orm import aliased -from billparser.db.models import ( +from congress_db.models import ( LegislationCommittee, LegislationChamber, Congress, diff --git a/backend/congress_fastapi/handlers/legislation/actions.py b/backend/congress_fastapi/handlers/legislation/actions.py index 23fc3b8f..a4987b24 100644 --- a/backend/congress_fastapi/handlers/legislation/actions.py +++ b/backend/congress_fastapi/handlers/legislation/actions.py @@ -4,7 +4,7 @@ from sqlalchemy import select, join, func, distinct, exists from sqlalchemy.orm import aliased from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( LegislationActionParse as LegislationActionParseModel, LegislationAction as LegislationActionModel, ) diff --git a/backend/congress_fastapi/handlers/legislation/content.py b/backend/congress_fastapi/handlers/legislation/content.py index 8bddf1e3..71b503a0 100644 --- a/backend/congress_fastapi/handlers/legislation/content.py +++ b/backend/congress_fastapi/handlers/legislation/content.py @@ -4,7 +4,7 @@ from sqlalchemy import select, join, func, distinct, exists from sqlalchemy.orm import aliased from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( LegislationContent as LegislationContentModel, ) from congress_fastapi.models.legislation.content import LegislationContent diff --git a/backend/congress_fastapi/handlers/legislation/policy_subject.py b/backend/congress_fastapi/handlers/legislation/policy_subject.py index 53508d16..8f775ec2 100644 --- a/backend/congress_fastapi/handlers/legislation/policy_subject.py +++ b/backend/congress_fastapi/handlers/legislation/policy_subject.py @@ -4,7 +4,7 @@ from sqlalchemy import select, join, func, distinct, exists from sqlalchemy.orm import aliased from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( LegislativePolicyArea as LegislativePolicyAreaModel, LegislativePolicyAreaAssociation as LegislativePolicyAreaAssociationModel, LegislativeSubject as LegislativeSubjectModel, diff --git a/backend/congress_fastapi/handlers/legislation/search.py b/backend/congress_fastapi/handlers/legislation/search.py index fdaeefd1..0614ca8a 100644 --- a/backend/congress_fastapi/handlers/legislation/search.py +++ b/backend/congress_fastapi/handlers/legislation/search.py @@ -19,7 +19,7 @@ import sqlalchemy from sqlalchemy.orm import aliased from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( Appropriation, Congress, Legislation, diff --git a/backend/congress_fastapi/handlers/legislation_metadata.py b/backend/congress_fastapi/handlers/legislation_metadata.py index 37bdb3f0..868a6844 100644 --- a/backend/congress_fastapi/handlers/legislation_metadata.py +++ b/backend/congress_fastapi/handlers/legislation_metadata.py @@ -10,7 +10,7 @@ ) from sqlalchemy import select -from billparser.db.models import ( +from congress_db.models import ( Appropriation as AppropriationModel, Legislation, LegislationSponsorship, diff --git a/backend/congress_fastapi/handlers/legislation_version.py b/backend/congress_fastapi/handlers/legislation_version.py index 3564fc0c..64d64038 100644 --- a/backend/congress_fastapi/handlers/legislation_version.py +++ b/backend/congress_fastapi/handlers/legislation_version.py @@ -3,13 +3,13 @@ from typing import List from collections import defaultdict -from billparser.prompt_runners.utils import run_query +from congress_parser.prompt_runners.utils import run_query from congress_fastapi.models.legislation.llm import LLMResponse from sqlalchemy import select, text, and_, or_ from sqlalchemy.orm import Session, load_only from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( LegislationContentTag, LegislationContent, LegislationContentSummary, diff --git a/backend/congress_fastapi/handlers/members.py b/backend/congress_fastapi/handlers/members.py index 984e9061..75dd1468 100644 --- a/backend/congress_fastapi/handlers/members.py +++ b/backend/congress_fastapi/handlers/members.py @@ -2,7 +2,7 @@ from sqlalchemy import select, and_, or_, func, asc, desc -from billparser.db.models import ( +from congress_db.models import ( Legislation, LegislationSponsorship, Legislator, diff --git a/backend/congress_fastapi/handlers/stats.py b/backend/congress_fastapi/handlers/stats.py index 46d8517c..e0adac32 100644 --- a/backend/congress_fastapi/handlers/stats.py +++ b/backend/congress_fastapi/handlers/stats.py @@ -8,7 +8,7 @@ from sqlalchemy import select, update, join, delete, func, literal from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import aliased -from billparser.db.models import ( +from congress_db.models import ( LegislationVersion ) from congress_fastapi.db.postgres import get_database diff --git a/backend/congress_fastapi/handlers/uscode.py b/backend/congress_fastapi/handlers/uscode.py index 51826fb0..bb2fe969 100644 --- a/backend/congress_fastapi/handlers/uscode.py +++ b/backend/congress_fastapi/handlers/uscode.py @@ -7,7 +7,7 @@ from sqlalchemy import select, or_ from congress_fastapi.db.postgres import get_database -from billparser.db.models import ( +from congress_db.models import ( USCContent, USCChapter, USCSection, diff --git a/backend/congress_fastapi/handlers/user.py b/backend/congress_fastapi/handlers/user.py index 0783efc3..fe0395a0 100644 --- a/backend/congress_fastapi/handlers/user.py +++ b/backend/congress_fastapi/handlers/user.py @@ -9,7 +9,7 @@ from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import aliased -from billparser.db.models import ( +from congress_db.models import ( USCContent, USCContentDiff, UserIdent, diff --git a/backend/congress_fastapi/models/committees.py b/backend/congress_fastapi/models/committees.py index f018c4bb..ee236fed 100644 --- a/backend/congress_fastapi/models/committees.py +++ b/backend/congress_fastapi/models/committees.py @@ -1,7 +1,7 @@ from typing import Annotated, Optional from datetime import datetime -from billparser.db.models import ( +from congress_db.models import ( LegislationCommittee, LegislationChamber, ) diff --git a/backend/congress_fastapi/models/legislation/actions.py b/backend/congress_fastapi/models/legislation/actions.py index 9e42e721..4158e861 100644 --- a/backend/congress_fastapi/models/legislation/actions.py +++ b/backend/congress_fastapi/models/legislation/actions.py @@ -2,7 +2,7 @@ from datetime import datetime -from billparser.db.models import ( +from congress_db.models import ( LegislationActionParse as LegislationActionParseModel, LegislationAction as LegislationActionModel, ) diff --git a/backend/congress_fastapi/models/legislation/content.py b/backend/congress_fastapi/models/legislation/content.py index 24073f72..e2ef0bf1 100644 --- a/backend/congress_fastapi/models/legislation/content.py +++ b/backend/congress_fastapi/models/legislation/content.py @@ -2,7 +2,7 @@ from datetime import datetime -from billparser.db.models import ( +from congress_db.models import ( LegislationContent as LegislationContentModel, ) from congress_fastapi.models.abstract import MappableBase diff --git a/backend/congress_fastapi/models/legislation/diff.py b/backend/congress_fastapi/models/legislation/diff.py index e779ebcc..40b5b8b4 100644 --- a/backend/congress_fastapi/models/legislation/diff.py +++ b/backend/congress_fastapi/models/legislation/diff.py @@ -2,7 +2,7 @@ from datetime import datetime -from billparser.db.models import ( +from congress_db.models import ( USCContentDiff as USCContentDiffModel, ) from congress_fastapi.models.abstract import MappableBase diff --git a/backend/congress_fastapi/models/legislation/metadata.py b/backend/congress_fastapi/models/legislation/metadata.py index 5bdd3f5c..f10dd1ca 100644 --- a/backend/congress_fastapi/models/legislation/metadata.py +++ b/backend/congress_fastapi/models/legislation/metadata.py @@ -4,7 +4,7 @@ from datetime import datetime import json -from billparser.db.models import ( +from congress_db.models import ( Legislation, LegislationVersion, LegislationVote, diff --git a/backend/congress_fastapi/models/legislation/search.py b/backend/congress_fastapi/models/legislation/search.py index a187bbeb..10fd51d2 100644 --- a/backend/congress_fastapi/models/legislation/search.py +++ b/backend/congress_fastapi/models/legislation/search.py @@ -1,5 +1,5 @@ from typing import List, Optional -from billparser.db.models import LegislationChamber, LegislationVersionEnum +from congress_db.models import LegislationChamber, LegislationVersionEnum from pydantic import BaseModel from datetime import date diff --git a/backend/congress_fastapi/models/members.py b/backend/congress_fastapi/models/members.py index 62bd065d..8774c6ca 100644 --- a/backend/congress_fastapi/models/members.py +++ b/backend/congress_fastapi/models/members.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from billparser.db.models import ( +from congress_db.models import ( Legislation, LegislationSponsorship, Legislator, diff --git a/backend/congress_fastapi/models/user.py b/backend/congress_fastapi/models/user.py index 3ac59848..f5e80954 100644 --- a/backend/congress_fastapi/models/user.py +++ b/backend/congress_fastapi/models/user.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, Field from datetime import date -from billparser.db.models import ( +from congress_db.models import ( Legislation, Legislator, LegislationChamber, diff --git a/backend/congress_fastapi/routes/legislation.py b/backend/congress_fastapi/routes/legislation.py index 8cd20286..1df000de 100644 --- a/backend/congress_fastapi/routes/legislation.py +++ b/backend/congress_fastapi/routes/legislation.py @@ -1,7 +1,7 @@ from typing import List -from billparser.db.models import LegislationVersionEnum +from congress_db.models import LegislationVersionEnum -from billparser.prompt_runners.utils import ( +from congress_parser.prompt_runners.utils import ( get_legis_by_parent_and_id, get_usc_content_by_parent_and_id, print_clause, diff --git a/backend/congress_fastapi/routes/legislation_version.py b/backend/congress_fastapi/routes/legislation_version.py index dd113210..0673f59c 100644 --- a/backend/congress_fastapi/routes/legislation_version.py +++ b/backend/congress_fastapi/routes/legislation_version.py @@ -1,8 +1,8 @@ from typing import Dict, List from congress_fastapi.models.legislation.diff import BillDiffMetadataList -from billparser.db.models import UserIdent -from billparser.prompt_runners.utils import get_legis_by_parent_and_id, print_clause +from congress_db.models import UserIdent +from congress_parser.prompt_runners.utils import get_legis_by_parent_and_id, print_clause from congress_fastapi.handlers.user import get_llm_query_result, insert_llm_query_result from congress_fastapi.models.legislation.content import LegislationContent from congress_fastapi.handlers.legislation.actions import ( diff --git a/backend/congress_fastapi/routes/search.py b/backend/congress_fastapi/routes/search.py index c290129d..62f18a56 100644 --- a/backend/congress_fastapi/routes/search.py +++ b/backend/congress_fastapi/routes/search.py @@ -1,5 +1,5 @@ from typing import List -from billparser.db.models import LegislationChamber, LegislationVersionEnum +from congress_db.models import LegislationChamber, LegislationVersionEnum from fastapi import APIRouter, HTTPException, Query, status diff --git a/backend/congress_fastapi/routes/uscode.py b/backend/congress_fastapi/routes/uscode.py index 29954e7a..09fc6d2c 100644 --- a/backend/congress_fastapi/routes/uscode.py +++ b/backend/congress_fastapi/routes/uscode.py @@ -1,5 +1,5 @@ from typing import Any, Dict, List -from billparser.db.models import LegislationVersionEnum +from congress_db.models import LegislationVersionEnum from congress_fastapi.handlers.uscode import read_usc_content, search_chroma from congress_fastapi.models.uscode import USCodeSearchRequest, USCodeSearchResponse @@ -10,7 +10,7 @@ ) from congress_fastapi.models.errors import Error from congress_fastapi.models.legislation import LegislationMetadata -from billparser.prompt_runners.utils import ( +from congress_parser.prompt_runners.utils import ( get_usc_content_by_parent_and_id, print_clause, ) diff --git a/backend/congress_fastapi/routes/user.py b/backend/congress_fastapi/routes/user.py index 5575d921..ccfeaee0 100644 --- a/backend/congress_fastapi/routes/user.py +++ b/backend/congress_fastapi/routes/user.py @@ -1,6 +1,6 @@ import traceback from typing import List, Optional -from billparser.db.models import UserIdent +from congress_db.models import UserIdent from fastapi import ( APIRouter, HTTPException, diff --git a/backend/billparser/README.md b/backend/congress_parser/README.md similarity index 100% rename from backend/billparser/README.md rename to backend/congress_parser/README.md diff --git a/backend/congress_parser/__init__.py b/backend/congress_parser/__init__.py new file mode 100644 index 00000000..15db5df5 --- /dev/null +++ b/backend/congress_parser/__init__.py @@ -0,0 +1,3 @@ +from congress_parser.utils.logger import initialize_logger + +initialize_logger() \ No newline at end of file diff --git a/backend/billparser/actions/__init__.py b/backend/congress_parser/actions/__init__.py similarity index 97% rename from backend/billparser/actions/__init__.py rename to backend/congress_parser/actions/__init__.py index 0f800741..361efa4d 100644 --- a/backend/billparser/actions/__init__.py +++ b/backend/congress_parser/actions/__init__.py @@ -1,6 +1,6 @@ import re from typing import Dict, Optional, TypedDict -from billparser.logger import log +from congress_parser.logger import log from unidecode import unidecode from enum import Enum diff --git a/backend/billparser/actions/parser.py b/backend/congress_parser/actions/parser.py similarity index 98% rename from backend/billparser/actions/parser.py rename to backend/congress_parser/actions/parser.py index 33b92041..857818c4 100644 --- a/backend/billparser/actions/parser.py +++ b/backend/congress_parser/actions/parser.py @@ -1,16 +1,16 @@ -from billparser.utils.logger import LogContext -from billparser.actions import ActionObject, ActionType, determine_action -from billparser.actions.utils import strike_emulation +from congress_parser.utils.logger import LogContext +from congress_parser.actions import ActionObject, ActionType, determine_action +from congress_parser.actions.utils import strike_emulation from sqlalchemy.orm import Session from sqlalchemy.sql import Select from sqlalchemy import Table from sqlalchemy.dialects import postgresql -from billparser.utils.cite_parser import ( +from congress_parser.utils.cite_parser import ( CiteObject, parse_action_for_cite, parse_text_for_cite, ) -from billparser.db.handler import Session, get_scoped_session +from congress_db.session import Session, get_scoped_session from sqlalchemy import select, update from sqlalchemy.orm import aliased @@ -20,7 +20,7 @@ from collections import defaultdict from typing import Dict, List, Optional, Tuple -from billparser.db.models import ( +from congress_db.models import ( Legislation, LegislationContent, LegislationActionParse, diff --git a/backend/billparser/actions/redesignate.py b/backend/congress_parser/actions/redesignate.py similarity index 87% rename from backend/billparser/actions/redesignate.py rename to backend/congress_parser/actions/redesignate.py index 478ea00d..48bec331 100644 --- a/backend/billparser/actions/redesignate.py +++ b/backend/congress_parser/actions/redesignate.py @@ -1,8 +1,8 @@ -from billparser.db.handler import Session -from billparser.db.models import USCContentDiff, USCSection, USCContent -from billparser.logger import log +from congress_db.session import Session +from congress_db.models import USCContentDiff, USCSection, USCContent +from congress_parser.logger import log import re -from billparser.actions import ActionObject +from congress_parser.actions import ActionObject name_extract = re.compile(r"\((?P.+?)") diff --git a/backend/billparser/actions/utils.py b/backend/congress_parser/actions/utils.py similarity index 100% rename from backend/billparser/actions/utils.py rename to backend/congress_parser/actions/utils.py diff --git a/backend/billparser/appropriations/__init__.py b/backend/congress_parser/appropriations/__init__.py similarity index 97% rename from backend/billparser/appropriations/__init__.py rename to backend/congress_parser/appropriations/__init__.py index 2a533aa8..1d3ed21d 100644 --- a/backend/billparser/appropriations/__init__.py +++ b/backend/congress_parser/appropriations/__init__.py @@ -1,7 +1,7 @@ import spacy from spacy.matcher import Matcher -from billparser.db.models import Appropriation, LegislationContent +from congress_db.models import Appropriation, LegislationContent nlp = spacy.load("en_core_web_sm") matcher = Matcher(nlp.vocab) # to remain available until diff --git a/backend/billparser/appropriations/parser.py b/backend/congress_parser/appropriations/parser.py similarity index 96% rename from backend/billparser/appropriations/parser.py rename to backend/congress_parser/appropriations/parser.py index 2142e817..40a4ce0f 100644 --- a/backend/billparser/appropriations/parser.py +++ b/backend/congress_parser/appropriations/parser.py @@ -1,8 +1,8 @@ import json from typing import List -from billparser.appropriations import calculate_appropriation -from billparser.db.models import LegislationContent, Appropriation -from billparser.db.handler import Session +from congress_parser.appropriations import calculate_appropriation +from congress_db.models import LegislationContent, Appropriation +from congress_db.session import Session from litellm import completion query = """class Appropriation(TypedDict): diff --git a/backend/billparser/db/__init__.py b/backend/congress_parser/bioguide/__init__.py similarity index 100% rename from backend/billparser/db/__init__.py rename to backend/congress_parser/bioguide/__init__.py diff --git a/backend/billparser/bioguide/generator.py b/backend/congress_parser/bioguide/generator.py similarity index 100% rename from backend/billparser/bioguide/generator.py rename to backend/congress_parser/bioguide/generator.py diff --git a/backend/billparser/bioguide/manager.py b/backend/congress_parser/bioguide/manager.py similarity index 98% rename from backend/billparser/bioguide/manager.py rename to backend/congress_parser/bioguide/manager.py index 0ba2beaf..2d2c9022 100644 --- a/backend/billparser/bioguide/manager.py +++ b/backend/congress_parser/bioguide/manager.py @@ -6,9 +6,9 @@ import io import json -from billparser.db.handler import Session -from billparser.db.models import Legislator -from billparser.bioguide.types import BioGuideMember +from congress_db.session import Session +from congress_db.models import Legislator +from congress_parser.bioguide.types import BioGuideMember BULK_BIOGUIDE_URL = "https://bioguide.congress.gov/bioguide/data/BioguideProfiles.zip" SENATE_LIST_URL = "https://www.senate.gov/legislative/LIS_MEMBER/cvc_member_data.xml" diff --git a/backend/billparser/bioguide/types.py b/backend/congress_parser/bioguide/types.py similarity index 100% rename from backend/billparser/bioguide/types.py rename to backend/congress_parser/bioguide/types.py diff --git a/backend/billparser/compare.py b/backend/congress_parser/compare.py similarity index 100% rename from backend/billparser/compare.py rename to backend/congress_parser/compare.py diff --git a/backend/billparser/conftest.py b/backend/congress_parser/conftest.py similarity index 100% rename from backend/billparser/conftest.py rename to backend/congress_parser/conftest.py diff --git a/backend/billparser/downloader.py b/backend/congress_parser/downloader.py similarity index 100% rename from backend/billparser/downloader.py rename to backend/congress_parser/downloader.py diff --git a/backend/billparser/helpers.py b/backend/congress_parser/helpers.py similarity index 99% rename from backend/billparser/helpers.py rename to backend/congress_parser/helpers.py index beb66368..d3c322ed 100644 --- a/backend/billparser/helpers.py +++ b/backend/congress_parser/helpers.py @@ -1,7 +1,7 @@ from typing import Optional from lxml import etree import re -from billparser.translater import translate_paragraph +from congress_parser.translater import translate_paragraph from unidecode import unidecode diff --git a/backend/billparser/importers/__init__.py b/backend/congress_parser/importers/__init__.py similarity index 100% rename from backend/billparser/importers/__init__.py rename to backend/congress_parser/importers/__init__.py diff --git a/backend/billparser/importers/actions.py b/backend/congress_parser/importers/actions.py similarity index 91% rename from backend/billparser/importers/actions.py rename to backend/congress_parser/importers/actions.py index e5a71da4..a6c04a71 100644 --- a/backend/billparser/importers/actions.py +++ b/backend/congress_parser/importers/actions.py @@ -1,11 +1,11 @@ import os import time from typing import Dict, List -from billparser.db.models import LegislationActionParse, LegislationVersion +from congress_db.models import LegislationActionParse, LegislationVersion from joblib import Parallel, delayed from sqlalchemy import func -from billparser.actions.parser import parse_bill_for_actions -from billparser.db.handler import Session, init_session +from congress_parser.actions.parser import parse_bill_for_actions +from congress_db.session import Session, init_session THREADS = int(os.environ.get("PARSE_THREADS", -4)) diff --git a/backend/billparser/importers/bills.py b/backend/congress_parser/importers/bills.py similarity index 92% rename from backend/billparser/importers/bills.py rename to backend/congress_parser/importers/bills.py index 86db544c..aed713ba 100644 --- a/backend/billparser/importers/bills.py +++ b/backend/congress_parser/importers/bills.py @@ -2,12 +2,12 @@ import sys import json import zipfile -from billparser.appropriations.parser import parse_bill_for_appropriations -from billparser.db.models import LegislationVersion +from congress_parser.appropriations.parser import parse_bill_for_appropriations +from congress_db.models import LegislationVersion import requests import argparse from datetime import datetime -from billparser.run_through import parse_archives, ensure_congress +from congress_parser.run_through import parse_archives, ensure_congress webhook_url = os.environ.get("DISCORD_WEBHOOK", None) diff --git a/backend/billparser/importers/bioguide.py b/backend/congress_parser/importers/bioguide.py similarity index 68% rename from backend/billparser/importers/bioguide.py rename to backend/congress_parser/importers/bioguide.py index 19788a45..e0bd0088 100644 --- a/backend/billparser/importers/bioguide.py +++ b/backend/congress_parser/importers/bioguide.py @@ -1,4 +1,4 @@ -from billparser.bioguide.manager import BioGuideImporter +from congress_parser.bioguide.manager import BioGuideImporter if __name__ == "__main__": diff --git a/backend/billparser/importers/cleanup.py b/backend/congress_parser/importers/cleanup.py similarity index 96% rename from backend/billparser/importers/cleanup.py rename to backend/congress_parser/importers/cleanup.py index f55fe8e0..2fee30c4 100644 --- a/backend/billparser/importers/cleanup.py +++ b/backend/congress_parser/importers/cleanup.py @@ -2,8 +2,8 @@ from sqlalchemy.orm import aliased import os -from billparser.db.handler import engine, Session -from billparser.db.models import USCRelease +from congress_db.session import engine, Session +from congress_db.models import USCRelease webhook_url = os.environ.get("DISCORD_WEBHOOK", None) diff --git a/backend/billparser/importers/committees.py b/backend/congress_parser/importers/committees.py similarity index 97% rename from backend/billparser/importers/committees.py rename to backend/congress_parser/importers/committees.py index fd64fc62..3f198ac0 100644 --- a/backend/billparser/importers/committees.py +++ b/backend/congress_parser/importers/committees.py @@ -1,5 +1,5 @@ -from billparser.db.models import LegislationChamber, LegislationCommittee -from billparser.db.handler import Session +from congress_db.models import LegislationChamber, LegislationCommittee +from congress_db.session import Session import yaml import requests diff --git a/backend/billparser/importers/prompts.py b/backend/congress_parser/importers/prompts.py similarity index 86% rename from backend/billparser/importers/prompts.py rename to backend/congress_parser/importers/prompts.py index 1b48af52..454ba3d0 100644 --- a/backend/billparser/importers/prompts.py +++ b/backend/congress_parser/importers/prompts.py @@ -1,18 +1,18 @@ from collections import defaultdict import logging from typing import Dict, List -from billparser.prompt_runners.bill_tagger import bill_tagger -from billparser.utils.logger import LogContext +from congress_parser.prompt_runners.bill_tagger import bill_tagger +from congress_parser.utils.logger import LogContext from sqlalchemy import select, and_, not_ import litellm -from billparser.db.models import PromptBatch, LegislationVersion, Prompt -from billparser.db.handler import Session +from congress_db.models import PromptBatch, LegislationVersion, Prompt +from congress_db.session import Session -from billparser.prompt_runners.appropriation_finder import appropriation_finder -from billparser.prompt_runners.clause_tagger import clause_tagger -from billparser.prompt_runners.section_summarizer import section_summarizer +from congress_parser.prompt_runners.appropriation_finder import appropriation_finder +from congress_parser.prompt_runners.clause_tagger import clause_tagger +from congress_parser.prompt_runners.section_summarizer import section_summarizer def get_outstanding_legis_versions(prompt_ids: List[int]) -> List[int]: diff --git a/backend/billparser/db/handler.py b/backend/congress_parser/importers/releases.py similarity index 64% rename from backend/billparser/db/handler.py rename to backend/congress_parser/importers/releases.py index 3e62857a..931c145e 100644 --- a/backend/billparser/db/handler.py +++ b/backend/congress_parser/importers/releases.py @@ -1,57 +1,36 @@ -import os -import re +import argparse +import html import string -import time - -from lxml import etree +import os from unidecode import unidecode # GPLV2 -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker, scoped_session -from sqlalchemy.pool import NullPool +import zipfile +from datetime import datetime +from congress_parser.importers.bills import download_path +from joblib import Parallel, delayed +import requests +from sqlalchemy import func +from congress_db.session import import_title, get_number, Session +from congress_db.models import USCRelease, Version -from billparser.utils.citation import resolve_citations -from billparser.db.models import USCSection -from billparser.db.models import * - -username = os.environ.get("db_user", "bills") -password = os.environ.get("db_pass", "bills") -table = os.environ.get("db_table", "uscode") -db_host = os.environ.get("db_host", "localhost:5401") -DATABASE_URI = f"postgresql://{username}:{password}@{db_host}/{table}" -print(DATABASE_URI) -engine = create_engine( - DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} -) -# PromptsBase.metadata.create_all(engine) -# AppropriationsBase.metadata.create_all(engine) -# Base.metadata.create_all(engine) +THREADS = int(os.environ.get("PARSE_THREADS", -1)) +DOWNLOAD_BASE = "https://uscode.house.gov/download/{}" +RELEASE_POINTS = "https://uscode.house.gov/download/priorreleasepoints.htm" ribber = string.ascii_letters + string.digits -Session = scoped_session(sessionmaker(bind=engine)) - -def init_session(): - """Initialize the Session object in the current process.""" - global Session - engine = create_engine( - DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} +def main(): + parser = argparse.ArgumentParser(description="Process release points.") + parser.add_argument( + "--release-point", + type=str, + help="URL of the zip file to process a single release point", ) - engine.dispose() - Session = scoped_session(sessionmaker(bind=engine)) + args = parser.parse_args() - -def get_scoped_session(): - """Initialize the Session object in the current process.""" - global Session - engine = create_engine( - DATABASE_URI, poolclass=NullPool, connect_args={"sslmode": "disable"} - ) - Session = scoped_session(sessionmaker(bind=engine)) - return Session() - - -def unidecode_str(input_str: str) -> str: - return unidecode(input_str or "").replace("--", "-") + if args.release_point: + process_single_release_point(args.release_point) + else: + process_all_release_points() def open_usc(file_str): @@ -67,6 +46,9 @@ def open_usc(file_str): return lookup, ids +def unidecode_str(input_str: str) -> str: + return unidecode(input_str or "").replace("--", "-") + def get_number(ident: str) -> float: """ Converts a usc_ident into a number that is supposed to impart some implicit order @@ -278,3 +260,103 @@ def recursive_content(section_id, content_id, search_element, order): session.flush() recursive_content(sect_obj.usc_section_id, None, elem, 0) session.commit() + +def process_single_release_point(url, release=None): + zip_file_path = download_path(url) + with zipfile.ZipFile(zip_file_path) as zip_file: + if release is None: + session = Session() + new_version = Version(base_id=None) + session.add(new_version) + session.flush() + release = USCRelease( + short_title=zip_file_path.split("/")[-1].split(".")[0], + effective_date=datetime.now(), + long_title="", + version_id=new_version.version_id, + ) + session.add(release) + session.commit() + files = zip_file.namelist() + files = sorted( + files, key=lambda x: get_number(x.split(".")[0].replace("usc", "")) + ) + Parallel(n_jobs=THREADS, verbose=5, backend="loky")( + delayed(import_title)( + zip_file.open(file).read(), + file.split(".")[0].replace("usc", ""), + None, # Assuming title is not needed for single release point + release.to_dict(), # Assuming release_point.to_dict() is not needed for single release point + ) + for file in files + ) + + +def process_all_release_points(): + release_points = [] + response = requests.get(RELEASE_POINTS) + tree = html.fromstring(response.content) + + for year in range(2022, datetime.now().year, 2): + search_date = f"12/21/{year}" + links = tree.xpath(f'//a[contains(text(), "{search_date}")]/@href') + + if len(links) > 0: + link = links[0].replace("usc-rp", "xml_uscAll").replace(".htm", ".zip") + zipPath = DOWNLOAD_BASE.format(link) + match = re.search(r"@(\d+)-(\d+)\.zip", link) + + release_points.append( + { + "date": search_date, + "short_title": f"Public Law {match.group(1)}-{match.group(2)}", + "long_title": "", + "url": zipPath, + } + ) + + session = Session() + for rp in release_points: + existing_rp = ( + session.query(USCRelease) + .filter( + USCRelease.short_title == rp.get("short_title"), + func.date(USCRelease.effective_date) + == datetime.strptime(rp.get("date"), "%m/%d/%Y"), + ) + .all() + ) + if len(existing_rp) > 0: + print("Already in DB - Skipping") + continue + new_version = Version(base_id=None) + session.add(new_version) + session.commit() + release_point = USCRelease( + short_title=rp.get("short_title"), + effective_date=datetime.strptime(rp.get("date"), "%m/%d/%Y"), + long_title=rp.get("long_title"), + version_id=new_version.version_id, + ) + session.add(release_point) + session.commit() + zip_file_path = download_path(rp.get("url")) + with zipfile.ZipFile(f"usc/{zip_file_path}") as zip_file: + files = zip_file.namelist() + + files = sorted( + files, key=lambda x: get_number(x.split(".")[0].replace("usc", "")) + ) + Parallel(n_jobs=THREADS, verbose=5, backend="multiprocessing")( + delayed(import_title)( + zip_file.open(file).read(), + file.split(".")[0].replace("usc", ""), + rp.get("title"), + release_point.to_dict(), + ) + for file in files # if "09" in file + ) + + +if __name__ == "__main__": + main() diff --git a/backend/billparser/importers/sponsors.py b/backend/congress_parser/importers/sponsors.py similarity index 81% rename from backend/billparser/importers/sponsors.py rename to backend/congress_parser/importers/sponsors.py index caea9a61..95832053 100644 --- a/backend/billparser/importers/sponsors.py +++ b/backend/congress_parser/importers/sponsors.py @@ -1,8 +1,8 @@ from time import sleep -from billparser.db.handler import Session, init_session -from billparser.db.models import Legislation, LegislationSponsorship -from billparser.metadata.sponsors import extract_sponsors_from_api +from congress_db.session import Session, init_session +from congress_db.models import Legislation, LegislationSponsorship +from congress_parser.metadata.sponsors import extract_sponsors_from_api MIN_TIME_BETWEEN_REQUESTS = 3600 / 5000 diff --git a/backend/billparser/importers/statuses.py b/backend/congress_parser/importers/statuses.py similarity index 92% rename from backend/billparser/importers/statuses.py rename to backend/congress_parser/importers/statuses.py index d9ca9ac1..dc642cca 100644 --- a/backend/billparser/importers/statuses.py +++ b/backend/congress_parser/importers/statuses.py @@ -1,5 +1,5 @@ import os -from billparser.status_parser import parse_archive +from congress_parser.status_parser import parse_archive url_format = "https://www.govinfo.gov/bulkdata/BILLSTATUS/{congress}/{prefix}/BILLSTATUS-{congress}-{prefix}.zip" diff --git a/backend/billparser/importers/table3.py b/backend/congress_parser/importers/table3.py similarity index 97% rename from backend/billparser/importers/table3.py rename to backend/congress_parser/importers/table3.py index 39c597de..5abbb01b 100644 --- a/backend/billparser/importers/table3.py +++ b/backend/congress_parser/importers/table3.py @@ -8,8 +8,8 @@ from lxml import etree -from billparser.db.models import USCPopularName, USCActSection, USCRelease -from billparser.db.handler import Session +from congress_db.models import USCPopularName, USCActSection, USCRelease +from congress_db.session import Session popoular_name_url = "https://uscode.house.gov/popularnames/popularnames.htm" diff --git a/backend/billparser/importers/votes.py b/backend/congress_parser/importers/votes.py similarity index 96% rename from backend/billparser/importers/votes.py rename to backend/congress_parser/importers/votes.py index a3efb9fd..5c68cd61 100644 --- a/backend/billparser/importers/votes.py +++ b/backend/congress_parser/importers/votes.py @@ -8,9 +8,9 @@ import time import os -from billparser.db.handler import Session -from billparser.db.models import LegislationVote, LegislatorVote, LegislatorVoteType, Legislation, LegislationChamber, Legislator, Congress -from billparser.bioguide.manager import BioGuideImporter +from congress_db.session import Session +from congress_db.models import LegislationVote, LegislatorVote, LegislatorVoteType, Legislation, LegislationChamber, Legislator, Congress +from congress_parser.bioguide.manager import BioGuideImporter webhook_url = os.environ.get("DISCORD_WEBHOOK", None) diff --git a/backend/billparser/isort.cfg b/backend/congress_parser/isort.cfg similarity index 100% rename from backend/billparser/isort.cfg rename to backend/congress_parser/isort.cfg diff --git a/backend/billparser/logger.py b/backend/congress_parser/logger.py similarity index 100% rename from backend/billparser/logger.py rename to backend/congress_parser/logger.py diff --git a/backend/billparser/metadata/__init__.py b/backend/congress_parser/metadata/__init__.py similarity index 100% rename from backend/billparser/metadata/__init__.py rename to backend/congress_parser/metadata/__init__.py diff --git a/backend/billparser/metadata/sponsors.py b/backend/congress_parser/metadata/sponsors.py similarity index 97% rename from backend/billparser/metadata/sponsors.py rename to backend/congress_parser/metadata/sponsors.py index d8f808d0..b322f687 100644 --- a/backend/billparser/metadata/sponsors.py +++ b/backend/congress_parser/metadata/sponsors.py @@ -1,7 +1,7 @@ import logging import requests from typing import List, Dict -from billparser.db.models import LegislationSponsorship, Congress +from congress_db.models import LegislationSponsorship, Congress from os import environ CONGRESS_API_KEY = environ.get('CONGRESS_API_KEY') diff --git a/backend/congress_parser/nightly.py b/backend/congress_parser/nightly.py new file mode 100644 index 00000000..96d4e5eb --- /dev/null +++ b/backend/congress_parser/nightly.py @@ -0,0 +1,9 @@ +from congress_parser.downloader import download +from congress_parser.run_through import run_archives +from congress_parser.prune import run_prune + +# TODO: Put this back onto a cron job +if __name__ == "__main__": + download() + run_archives() + run_prune() diff --git a/backend/billparser/prompt_runners/__init__.py b/backend/congress_parser/prompt_runners/__init__.py similarity index 100% rename from backend/billparser/prompt_runners/__init__.py rename to backend/congress_parser/prompt_runners/__init__.py diff --git a/backend/billparser/prompt_runners/appropriation_finder.py b/backend/congress_parser/prompt_runners/appropriation_finder.py similarity index 96% rename from backend/billparser/prompt_runners/appropriation_finder.py rename to backend/congress_parser/prompt_runners/appropriation_finder.py index cad753c7..cca919f7 100644 --- a/backend/billparser/prompt_runners/appropriation_finder.py +++ b/backend/congress_parser/prompt_runners/appropriation_finder.py @@ -1,14 +1,14 @@ import logging from typing import List -from billparser.db.models import Appropriation -from billparser.db.handler import Session +from congress_db.models import Appropriation +from congress_db.session import Session import json -from billparser.utils.logger import LogContext +from congress_parser.utils.logger import LogContext import jsonschema from typing import List -from billparser.db.models import Appropriation, PromptBatch -from billparser.db.handler import Session -from billparser.prompt_runners.utils import get_existing_batch_or_content, run_query +from congress_db.models import Appropriation, PromptBatch +from congress_db.session import Session +from congress_parser.prompt_runners.utils import get_existing_batch_or_content, run_query from datetime import datetime SCHEMA = { diff --git a/backend/billparser/prompt_runners/bill_tagger.py b/backend/congress_parser/prompt_runners/bill_tagger.py similarity index 94% rename from backend/billparser/prompt_runners/bill_tagger.py rename to backend/congress_parser/prompt_runners/bill_tagger.py index 210bbc34..00a34d51 100644 --- a/backend/billparser/prompt_runners/bill_tagger.py +++ b/backend/congress_parser/prompt_runners/bill_tagger.py @@ -1,16 +1,16 @@ import logging from typing import List -from billparser.db.models import ( +from congress_db.models import ( LegislationVersionTag, PromptBatch, ) -from billparser.db.handler import Session +from congress_db.session import Session import json -from billparser.utils.logger import LogContext +from congress_parser.utils.logger import LogContext import jsonschema from datetime import datetime -from billparser.prompt_runners.utils import ( +from congress_parser.prompt_runners.utils import ( get_existing_batch_or_content, get_legis_by_parent_and_id, print_clause, diff --git a/backend/billparser/prompt_runners/clause_tagger.py b/backend/congress_parser/prompt_runners/clause_tagger.py similarity index 95% rename from backend/billparser/prompt_runners/clause_tagger.py rename to backend/congress_parser/prompt_runners/clause_tagger.py index 951855d1..d4498bfd 100644 --- a/backend/billparser/prompt_runners/clause_tagger.py +++ b/backend/congress_parser/prompt_runners/clause_tagger.py @@ -1,17 +1,17 @@ import logging from typing import List -from billparser.db.models import ( +from congress_db.models import ( LegislationContentTag, PromptBatch, ) -from billparser.db.handler import Session +from congress_db.session import Session import json -from billparser.utils.logger import LogContext +from congress_parser.utils.logger import LogContext import jsonschema from collections import defaultdict from datetime import datetime -from billparser.prompt_runners.utils import ( +from congress_parser.prompt_runners.utils import ( get_existing_batch_or_content, get_legis_by_parent_and_id, print_clause, diff --git a/backend/billparser/prompt_runners/section_summarizer.py b/backend/congress_parser/prompt_runners/section_summarizer.py similarity index 94% rename from backend/billparser/prompt_runners/section_summarizer.py rename to backend/congress_parser/prompt_runners/section_summarizer.py index c77b0d31..e3e824db 100644 --- a/backend/billparser/prompt_runners/section_summarizer.py +++ b/backend/congress_parser/prompt_runners/section_summarizer.py @@ -1,14 +1,14 @@ import logging from typing import List, Optional -from billparser.db.models import LegislationContent, PromptBatch -from billparser.db.handler import Session +from congress_db.models import LegislationContent, PromptBatch +from congress_db.session import Session import json -from billparser.utils.logger import LogContext +from congress_parser.utils.logger import LogContext import jsonschema from typing import List -from billparser.db.models import LegislationContentSummary -from billparser.db.handler import Session -from billparser.prompt_runners.utils import ( +from congress_db.models import LegislationContentSummary +from congress_db.session import Session +from congress_parser.prompt_runners.utils import ( get_existing_batch_or_content, get_legis_by_parent_and_id, print_clause, diff --git a/backend/billparser/prompt_runners/utils.py b/backend/congress_parser/prompt_runners/utils.py similarity index 98% rename from backend/billparser/prompt_runners/utils.py rename to backend/congress_parser/prompt_runners/utils.py index a82a77a1..3d441b11 100644 --- a/backend/billparser/prompt_runners/utils.py +++ b/backend/congress_parser/prompt_runners/utils.py @@ -2,7 +2,7 @@ import time import logging from typing import Dict, List, Optional, Tuple, Union -from billparser.db.models import LegislationContent, Prompt, PromptBatch, USCContent +from congress_db.models import LegislationContent, Prompt, PromptBatch, USCContent from litellm import completion import litellm import os diff --git a/backend/billparser/prune.py b/backend/congress_parser/prune.py similarity index 87% rename from backend/billparser/prune.py rename to backend/congress_parser/prune.py index 08c71148..f604b46c 100644 --- a/backend/billparser/prune.py +++ b/backend/congress_parser/prune.py @@ -1,5 +1,5 @@ -from billparser.db.models import Bill, BillVersion, Version, ContentDiff -from billparser.db.handler import Session +from congress_db.models import Bill, BillVersion, Version, ContentDiff +from congress_db.session import Session def run_prune(): diff --git a/backend/billparser/run_through.py b/backend/congress_parser/run_through.py similarity index 98% rename from backend/billparser/run_through.py rename to backend/congress_parser/run_through.py index d8b3be51..d9744370 100644 --- a/backend/billparser/run_through.py +++ b/backend/congress_parser/run_through.py @@ -19,11 +19,11 @@ from sqlalchemy import desc -from billparser.actions import ActionObject -from billparser.actions import determine_action as determine_action2 -from billparser.actions.redesignate import redesignate +from congress_parser.actions import ActionObject +from congress_parser.actions import determine_action as determine_action2 +from congress_parser.actions.redesignate import redesignate -from billparser.db.models import ( +from congress_db.models import ( Legislation, LegislationContent, LegislationVersion, @@ -37,15 +37,15 @@ USCRelease, Congress, ) -from billparser.metadata.sponsors import ( +from congress_parser.metadata.sponsors import ( extract_sponsors_from_form, extract_sponsors_from_api, ) -from billparser.utils.logger import LogContext -from billparser.utils.cite_parser import parse_action_for_cite, ActionObject -from billparser.db.handler import Session, init_session -from billparser.translater import translate_paragraph +from congress_parser.utils.logger import LogContext +from congress_parser.utils.cite_parser import parse_action_for_cite, ActionObject +from congress_db.session import Session, init_session +from congress_parser.translater import translate_paragraph from joblib import Parallel, delayed from typing import Any, Dict, List, Optional diff --git a/backend/billparser/status_parser.py b/backend/congress_parser/status_parser.py similarity index 99% rename from backend/billparser/status_parser.py rename to backend/congress_parser/status_parser.py index c01cfd5d..6f812af8 100644 --- a/backend/billparser/status_parser.py +++ b/backend/congress_parser/status_parser.py @@ -5,7 +5,7 @@ from dateutil.parser import parse -from billparser.db.models import ( +from congress_db.models import ( LegislationAction, LegislationCommittee, Legislation, @@ -17,7 +17,7 @@ LegislativeSubject, LegislativeSubjectAssociation, ) -from billparser.db.handler import Session +from congress_db.session import Session def _ensure_committee_link(committee_id, legislation_id, referred_date, discharge_date): diff --git a/backend/billparser/tests/__init__.py b/backend/congress_parser/tests/__init__.py similarity index 100% rename from backend/billparser/tests/__init__.py rename to backend/congress_parser/tests/__init__.py diff --git a/backend/billparser/tests/test_actions.py b/backend/congress_parser/tests/test_actions.py similarity index 99% rename from backend/billparser/tests/test_actions.py rename to backend/congress_parser/tests/test_actions.py index 5de96251..7caebfd2 100644 --- a/backend/billparser/tests/test_actions.py +++ b/backend/congress_parser/tests/test_actions.py @@ -1,5 +1,5 @@ from unittest import TestCase, skip -from billparser.actions import determine_action, ActionType +from congress_parser.actions import determine_action, ActionType class TestDetermineAction(TestCase): diff --git a/backend/billparser/tests/test_cite_parser.py b/backend/congress_parser/tests/test_cite_parser.py similarity index 98% rename from backend/billparser/tests/test_cite_parser.py rename to backend/congress_parser/tests/test_cite_parser.py index 8796af16..6ffc9f5e 100644 --- a/backend/billparser/tests/test_cite_parser.py +++ b/backend/congress_parser/tests/test_cite_parser.py @@ -1,6 +1,6 @@ from unittest import TestCase from lxml import etree -from billparser.utils.cite_parser import ( +from congress_parser.utils.cite_parser import ( parse_action_for_cite, parse_text_for_cite, extract_usc_cite, diff --git a/backend/billparser/tests/test_parser.py b/backend/congress_parser/tests/test_parser.py similarity index 97% rename from backend/billparser/tests/test_parser.py rename to backend/congress_parser/tests/test_parser.py index 2b6f3d9d..f5809805 100644 --- a/backend/billparser/tests/test_parser.py +++ b/backend/congress_parser/tests/test_parser.py @@ -1,5 +1,5 @@ from unittest import TestCase -from billparser.actions.utils import strike_emulation +from congress_parser.actions.utils import strike_emulation class TestStrikeEmulation(TestCase): diff --git a/backend/billparser/translater.py b/backend/congress_parser/translater.py similarity index 100% rename from backend/billparser/translater.py rename to backend/congress_parser/translater.py diff --git a/backend/billparser/utils/__init__.py b/backend/congress_parser/utils/__init__.py similarity index 100% rename from backend/billparser/utils/__init__.py rename to backend/congress_parser/utils/__init__.py diff --git a/backend/billparser/utils/citation.py b/backend/congress_parser/utils/citation.py similarity index 100% rename from backend/billparser/utils/citation.py rename to backend/congress_parser/utils/citation.py diff --git a/backend/billparser/utils/cite_parser.py b/backend/congress_parser/utils/cite_parser.py similarity index 99% rename from backend/billparser/utils/cite_parser.py rename to backend/congress_parser/utils/cite_parser.py index 264374f0..6db6d257 100644 --- a/backend/billparser/utils/cite_parser.py +++ b/backend/congress_parser/utils/cite_parser.py @@ -2,7 +2,7 @@ import re import logging -from billparser.actions import Action, ActionType +from congress_parser.actions import Action, ActionType from unidecode import unidecode cite_contexts = {"last_title": None} diff --git a/backend/billparser/utils/logger.py b/backend/congress_parser/utils/logger.py similarity index 100% rename from backend/billparser/utils/logger.py rename to backend/congress_parser/utils/logger.py diff --git a/backend/pytest.ini b/backend/pytest.ini index 0cfa30ad..36f6ade0 100644 --- a/backend/pytest.ini +++ b/backend/pytest.ini @@ -1,3 +1,2 @@ [pytest] -pythonpath = billparser -addopts = --ignore=billparser/tests/test_routes.py \ No newline at end of file +pythonpath = congress_parser \ No newline at end of file diff --git a/backend/requirements-fastapi.txt b/backend/requirements-fastapi.txt deleted file mode 100644 index 29471931..00000000 --- a/backend/requirements-fastapi.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Python tools -python-json-logger~=2.0.4 -requests==2.32.2 - -# FastAPI tools -fastapi~=0.108.0 -uvicorn[standard] -slowapi~=0.1.9 - -# SQLAlchemy tools -databases[postgresql]~=0.8.0 -psycopg2-binary~=2.8.4 -pyhumps~=3.8.0 # camelCase <-> snake_case -SQLAlchemy~=1.4.0 - -en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl -spacy==3.7.2 -spacy-legacy==3.0.12 -spacy-loggers==1.0.5 -litellm==1.35.5 -tokenizers==0.15.2 -chromadb~=0.6.3 \ No newline at end of file diff --git a/backend/requirements-test.txt b/backend/requirements-test.txt deleted file mode 100644 index 55b033e9..00000000 --- a/backend/requirements-test.txt +++ /dev/null @@ -1 +0,0 @@ -pytest \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 1eedf2f2..1f456e5a 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,17 +1,16 @@ # Python tools -cachetools==2.1.0 +pytest +cachetools==6.2.4 green joblib==1.2.0 python_dateutil >= 2.6.0 python-json-logger~=2.0.4 setuptools >= 21.0.0 Unidecode==1.1.1 +requests==2.32.5 # Flask tools -connexion >= 2.5.0; python_version>="3.6" -connexion >= 2.3.0; python_version=="3.5" -connexion >= 2.3.0; python_version=="3.4" -connexion == 2.4.0; python_version<="2.7" +connexion >= 2.5.0 Flask==2.0.2 Flask-Cors==3.0.10 Flask-Compress==1.10.1 @@ -21,18 +20,30 @@ jinja2~=3.1.3 swagger-ui-bundle >= 0.0.2 werkzeug<=2.0.0 -# SQLAlchemy tools -alembic==1.14.0 -pydantic~=2.5.3 -psycopg2-binary~=2.8.4 -SQLAlchemy~=1.4.0 +# FastAPI tools +fastapi~=0.108.0 +uvicorn[standard] +slowapi~=0.1.9 + +# Database tools +alembic==1.16.5 +pydantic~=2.12.5 +psycopg2-binary~=2.9.11 +SQLAlchemy~=1.4.54 +databases[postgresql]~=0.8.0 +pyhumps~=3.8.0 # camelCase <-> snake_case +chromadb~=0.6.3 # Parsing tools -lxml==4.9.1 +lxml==6.0.2 +genson==1.3.0 +jsonschema==4.25.1 +pandas==2.3.3 +PyYAML==6.0.3 # LLM tools -en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl -spacy==3.7.2 +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl +spacy==3.8.11 spacy-legacy==3.0.12 spacy-loggers==1.0.5 litellm==1.35.5 diff --git a/backend/rp.json b/backend/rp.json deleted file mode 100644 index 456c1ad9..00000000 --- a/backend/rp.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "date": "12/21/2022", - "short_title": "Public Law 117-262", - "long_title": "", - "url": "https://uscode.house.gov/download/releasepoints/us/pl/117/262/xml_uscAll@117-262.zip" - }, - { - "date": "12/21/2024", - "short_title": "Public Law 118-158", - "long_title": "", - "url": "https://uscode.house.gov/download/releasepoints/us/pl/118/158/xml_uscAll@118-158.zip" - } -] \ No newline at end of file diff --git a/backend/setup.py b/backend/setup.py index 368dde20..cbb61379 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -1,24 +1,19 @@ from setuptools import setup, find_packages setup( - name='billparser', - version='0.8.8', - author='Bradley', + name='congress', + version='1.0.0', + author='Congress.Dev', author_email='mustyoshi@gmail.com', - description='Congress.dev bill parser', + description='Congress.Dev Packages', long_description=open('README.md').read(), long_description_content_type='text/markdown', - url='https://github.com/yourusername/billparser', - packages=find_packages(exclude=['tests'], include=["billparser"]), + url='https://github.com/Congress-Dev/congress-dev', + packages=find_packages(include=["congress_parser", "congress_db", "congress_api", "congress_fastapi"]), + license='MIT', classifiers=[ - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', ], - install_requires=[ - # list of your library dependencies - ], + install_requires=[], ) \ No newline at end of file diff --git a/codeql-custom-queries-python/codeql-pack.lock.yml b/codeql-custom-queries-python/codeql-pack.lock.yml deleted file mode 100644 index 6c6eccfa..00000000 --- a/codeql-custom-queries-python/codeql-pack.lock.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -lockVersion: 1.0.0 -dependencies: - codeql/dataflow: - version: 1.1.9 - codeql/mad: - version: 1.0.15 - codeql/python-all: - version: 3.1.1 - codeql/regex: - version: 1.0.15 - codeql/ssa: - version: 1.0.15 - codeql/threat-models: - version: 1.0.15 - codeql/tutorial: - version: 1.0.15 - codeql/typetracking: - version: 1.0.15 - codeql/util: - version: 2.0.2 - codeql/xml: - version: 1.0.15 - codeql/yaml: - version: 1.0.15 -compiled: false diff --git a/codeql-custom-queries-python/codeql-pack.yml b/codeql-custom-queries-python/codeql-pack.yml deleted file mode 100644 index d5a7fe90..00000000 --- a/codeql-custom-queries-python/codeql-pack.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -library: false -warnOnImplicitThis: false -name: getting-started/codeql-extra-queries-python -version: 1.0.0 -dependencies: - codeql/python-all: ^3.1.1 diff --git a/codeql-custom-queries-python/example.ql b/codeql-custom-queries-python/example.ql deleted file mode 100644 index e12c30a6..00000000 --- a/codeql-custom-queries-python/example.ql +++ /dev/null @@ -1,13 +0,0 @@ -/** - * This is an automatically generated file - * @name Hello world - * @kind problem - * @problem.severity warning - * @id python/example/hello-world - */ - -import python - -from Import imp -where imp.getAnImportedModuleName() = "zipfile" -select imp, imp.getLocation(), "File importing zipfile", "wut" \ No newline at end of file diff --git a/frontend/.gitignore b/frontend/.gitignore index 4d29575d..24cdedf8 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -20,4 +20,4 @@ npm-debug.log* yarn-debug.log* -yarn-error.log* +yarn-error.log* \ No newline at end of file diff --git a/frontend/src/common/enums.js b/frontend/src/common/enums.js index 1a45f398..459da25f 100644 --- a/frontend/src/common/enums.js +++ b/frontend/src/common/enums.js @@ -1,4 +1,4 @@ -// Source: backend/billparser/actions/__init__.py +// Source: backend/congress_parser/actions/__init__.py export const VALID_ACTIONS = [ "SHORT-TITLE", "PURPOSE", diff --git a/hillstack/.dockerignore b/hillstack/.dockerignore new file mode 100644 index 00000000..40b878db --- /dev/null +++ b/hillstack/.dockerignore @@ -0,0 +1 @@ +node_modules/ \ No newline at end of file diff --git a/hillstack/.env.example b/hillstack/.env.example index 5af835be..4553f4b2 100644 --- a/hillstack/.env.example +++ b/hillstack/.env.example @@ -1,15 +1,10 @@ -# Since the ".env" file is gitignored, you can use the ".env.example" file to -# build a new ".env" file when you clone the repo. Keep this file up-to-date -# when you add new variables to `.env`. - -# This file will be committed to version control, so make sure not to have any -# secrets in it. If you are cloning this repo, create a copy of this file named -# ".env" and populate it with your secrets. - # When adding additional environment variables, the schema in "/src/env.js" # should be updated accordingly. -# Prisma -# https://www.prisma.io/docs/reference/database-reference/connection-urls#env -DATABASE_URL="postgresql://postgres:password@localhost:5432/database" -NEXT_PUBLIC_APP_URL="https://youraddress.com" \ No newline at end of file +DATABASE_URL="postgresql://parser:parser@localhost:5432/us_code_2025" +NODE_ENV="development" +NEXT_PUBLIC_APP_URL="http://localhost:3001" +NEXTAUTH_URL="http://localhost:3001" +AUTH_SECRET="" +GOOGLE_CLIENT_ID="" +GOOGLE_CLIENT_SECRET="" \ No newline at end of file diff --git a/hillstack/.nvmrc b/hillstack/.nvmrc new file mode 100644 index 00000000..1e4f3920 --- /dev/null +++ b/hillstack/.nvmrc @@ -0,0 +1 @@ +24.12.0 \ No newline at end of file diff --git a/hillstack/README.md b/hillstack/README.md deleted file mode 100644 index 67943c7f..00000000 --- a/hillstack/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Create T3 App - -This is a [T3 Stack](https://create.t3.gg/) project bootstrapped with `create-t3-app`. - -## What's next? How do I make an app with this? - -We try to keep this project as simple as possible, so you can start with just the scaffolding we set up for you, and add additional things later when they become necessary. - -If you are not familiar with the different technologies used in this project, please refer to the respective docs. If you still are in the wind, please join our [Discord](https://t3.gg/discord) and ask for help. - -- [Next.js](https://nextjs.org) -- [NextAuth.js](https://next-auth.js.org) -- [Prisma](https://prisma.io) -- [Drizzle](https://orm.drizzle.team) -- [Tailwind CSS](https://tailwindcss.com) -- [tRPC](https://trpc.io) - -## Learn More - -To learn more about the [T3 Stack](https://create.t3.gg/), take a look at the following resources: - -- [Documentation](https://create.t3.gg/) -- [Learn the T3 Stack](https://create.t3.gg/en/faq#what-learning-resources-are-currently-available) — Check out these awesome tutorials - -You can check out the [create-t3-app GitHub repository](https://github.com/t3-oss/create-t3-app) — your feedback and contributions are welcome! - -## How do I deploy this? - -Follow our deployment guides for [Vercel](https://create.t3.gg/en/deployment/vercel), [Netlify](https://create.t3.gg/en/deployment/netlify) and [Docker](https://create.t3.gg/en/deployment/docker) for more information. diff --git a/hillstack/src/app/congress/bills/[billId]/overview/follow.tsx b/hillstack/src/app/congress/bills/[billId]/overview/follow.tsx index 5549a1b9..a5eb0b56 100644 --- a/hillstack/src/app/congress/bills/[billId]/overview/follow.tsx +++ b/hillstack/src/app/congress/bills/[billId]/overview/follow.tsx @@ -43,7 +43,11 @@ export function LegislationFollow({ sx={{ width: '100%', mb: 2 }} variant={following ? 'contained' : 'outlined'} > - {!session ? 'Login to Follow' : following ? 'Unfollow' : 'Follow'} + {!session + ? 'Login to Follow' + : following + ? 'Unfollow' + : 'Follow'} diff --git a/hillstack/src/app/congress/bills/[billId]/overview/page.tsx b/hillstack/src/app/congress/bills/[billId]/overview/page.tsx index 8d1ec089..84b8324a 100644 --- a/hillstack/src/app/congress/bills/[billId]/overview/page.tsx +++ b/hillstack/src/app/congress/bills/[billId]/overview/page.tsx @@ -50,7 +50,7 @@ export default async function BillOverviewPage({ > {data?.legislation_version?.map((version) => (