tusharv284 · bvpranu97 · Oct 2, 2020 · Oct 2, 2020
diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
 # Web Crawler Using Python
 This is a simple webcrawler to crawl a website.
 It uses python programming language to perform crawling.
+Change the url to your needs as start function parameter.
diff --git a/crawl words.py b/crawl words.py
@@ -0,0 +1,57 @@
+import requests 
+from bs4 import BeautifulSoup 
+import operator 
+from collections import Counter 
+
+def start(url): 
+
+
+	wordlist = [] 
+	source_code = requests.get(url).text 
+
+
+	soup = BeautifulSoup(source_code, 'html.parser') 
+
+
+	for each_text in soup.findAll('div', {'class':'entry-content'}): 
+		content = each_text.text 
+
+		words = content.lower().split() 
+
+		for each_word in words: 
+			wordlist.append(each_word) 
+		clean_wordlist(wordlist) 
+
+
+def clean_wordlist(wordlist): 
+
+	clean_list =[] 
+	for word in wordlist: 
+		symbols = '!@#$%^&*()_-+={[}]|\;:"<>?/., '
+
+		for i in range (0, len(symbols)): 
+			word = word.replace(symbols[i], '') 
+
+		if len(word) > 0: 
+			clean_list.append(word) 
+	create_dictionary(clean_list) 
+
+def create_dictionary(clean_list): 
+	word_count = {} 
+
+	for word in clean_list: 
+		if word in word_count: 
+			word_count[word] += 1
+		else: 
+			word_count[word] = 1
+
+
+	c = Counter(word_count) 
+
+	# returns the most occurring elements 
+	top = c.most_common(10) 
+	print(top) 
+
+
+if __name__ == '__main__': 
+	start("https://github.com")