-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathparser_web.py
More file actions
35 lines (29 loc) · 952 Bytes
/
parser_web.py
File metadata and controls
35 lines (29 loc) · 952 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import requests
from bs4 import BeautifulSoup
URL = "https://zenodo.org/record/4743386/preview/NoPTM-2_Mix_CHARMM36m_0.1x3mks.zip"
r = requests.get(URL)
soup = BeautifulSoup(r.content, "html5lib")
# print(soup.prettify())
quotes = []
table = soup.find("ul", attrs={"class": "tree list-unstyled"})
print(table)
chaine = []
for row in table.findAll("span"):
chaine.append(row.text)
for i in range(0, len(chaine), 2):
quote = {}
quote["filename"] = chaine[i]
quote["extension"] = chaine[i][-3:]
size = chaine[i + 1].split()
if size[1] == "GB":
quote["size"] = float(size[0]) * (10**9)
elif size[1] == "MB":
quote["size"] = float(size[0]) * (10**6)
elif size[1] == "kB":
quote["size"] = float(size[0]) * (10**3)
else:
quote["size"] = float(size[0])
quotes.append(quote)
files_df = pd.DataFrame(quotes).set_index("filename")
files_df.to_csv("files_zip1.csv")