From 1e65d58aa95d9131dda2144740fdd63e773318a2 Mon Sep 17 00:00:00 2001
From: bvpranu97 <49627284+bvpranu97@users.noreply.github.com>
Date: Fri, 2 Oct 2020 10:15:31 +0530
Subject: [PATCH 1/2] Add files via upload

---
 crawl words.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 crawl words.py

diff --git a/crawl words.py b/crawl words.py
new file mode 100644
index 0000000..834e7c6
--- /dev/null
+++ b/crawl words.py	
@@ -0,0 +1,57 @@
+import requests 
+from bs4 import BeautifulSoup 
+import operator 
+from collections import Counter 
+
+def start(url): 
+
+
+	wordlist = [] 
+	source_code = requests.get(url).text 
+
+
+	soup = BeautifulSoup(source_code, 'html.parser') 
+
+
+	for each_text in soup.findAll('div', {'class':'entry-content'}): 
+		content = each_text.text 
+
+		words = content.lower().split() 
+		
+		for each_word in words: 
+			wordlist.append(each_word) 
+		clean_wordlist(wordlist) 
+
+
+def clean_wordlist(wordlist): 
+	
+	clean_list =[] 
+	for word in wordlist: 
+		symbols = '!@#$%^&*()_-+={[}]|\;:"<>?/., '
+		
+		for i in range (0, len(symbols)): 
+			word = word.replace(symbols[i], '') 
+			
+		if len(word) > 0: 
+			clean_list.append(word) 
+	create_dictionary(clean_list) 
+
+def create_dictionary(clean_list): 
+	word_count = {} 
+	
+	for word in clean_list: 
+		if word in word_count: 
+			word_count[word] += 1
+		else: 
+			word_count[word] = 1
+			
+
+	c = Counter(word_count) 
+	
+	# returns the most occurring elements 
+	top = c.most_common(10) 
+	print(top) 
+
+
+if __name__ == '__main__': 
+	start("https://github.com") 

From 85fbd305b4621de916e4c3dbec9c20b5f62673be Mon Sep 17 00:00:00 2001
From: bvpranu97 <49627284+bvpranu97@users.noreply.github.com>
Date: Fri, 2 Oct 2020 10:16:27 +0530
Subject: [PATCH 2/2] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 317e479..0fd47c6 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
 # Web Crawler Using Python
 This is a simple webcrawler to crawl a website.
 It uses python programming language to perform crawling.
+Change the url to your needs as start function parameter.