-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
114 lines (95 loc) · 3.46 KB
/
utils.py
File metadata and controls
114 lines (95 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import base64
import streamlit as st
import boto3
import json
import pandas as pd
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import os
load_dotenv()
aws_key = os.getenv("AWS_ACCESS_KEY_ID")
# AWS Secrets (for production use)
def get_secret(secret_name: str, region_name: str = "ap-south-1") -> dict:
client = boto3.client("secretsmanager", region_name=region_name)
try:
response = client.get_secret_value(SecretId=secret_name)
if "SecretString" in response:
return json.loads(response["SecretString"])
else:
return json.loads(base64.b64decode(response["SecretBinary"]).decode("utf-8"))
except Exception as e:
st.error(f"❌ Failed to load secret: {e}")
return {}
# DynamoDB connection
@st.cache_resource
def init_dynamodb():
"""Handles AWS credentials for both local and cloud environments"""
try:
# Configuration for local development
local_config = {
'region_name': 'ap-south-1'
}
# Try Streamlit secrets first (for cloud deployment)
if hasattr(st, 'secrets') and 'aws' in st.secrets:
local_config.update({
'aws_access_key_id': st.secrets["aws"]["AWS_ACCESS_KEY_ID"],
'aws_secret_access_key': st.secrets["aws"]["AWS_SECRET_ACCESS_KEY"]
})
else:
# Fallback to environment variables (for local development)
from dotenv import load_dotenv
load_dotenv()
local_config.update({
'aws_access_key_id': os.getenv("AWS_ACCESS_KEY_ID"),
'aws_secret_access_key': os.getenv("AWS_SECRET_ACCESS_KEY")
})
return boto3.resource('dynamodb', **local_config)
except Exception as e:
st.error(f"❌ AWS Configuration Error: {str(e)}")
return None
# platform detection from domain
def get_platform_from_url(url):
try:
domain = urlparse(url.lower()).netloc.replace('www.', '')
platform_map = {
'linkedin.com': 'LinkedIn',
'x.com': 'X (Twitter)',
'twitter.com': 'X (Twitter)',
'github.com': 'GitHub',
'medium.com': 'Medium',
'arxiv.org': 'arXiv',
'hai.stanford.edu': 'HAI Stanford',
'nextgenai.simppl.org': 'NextGenAI',
'youtube.com': 'YouTube',
'substack.com': 'Substack'
}
# Try exact match first
if domain in platform_map:
return platform_map[domain]
# Fallback to root domain
return domain.split('.')[0].title()
except:
return 'Unknown'
# Topic extractor via page title / OpenGraph
@st.cache_data(show_spinner=False)
def extract_title_from_url(url):
try:
headers = {'User-Agent': 'Mozilla/5.0'}
resp = requests.get(url, timeout=5, headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')
# Try OpenGraph <meta property="og:title">
og_title = soup.find('meta', property='og:title')
if og_title and og_title.get('content'):
return og_title['content'].strip()
# Try <title> tag
if soup.title and soup.title.string:
return soup.title.string.strip()
# Try H1 tag
h1 = soup.find('h1')
if h1:
return h1.text.strip()
return 'Unclassified'
except:
return 'Unclassified'