web scrapping

linrakesh · Apr 20, 2018 · fa06f03 · fa06f03
1 parent ae73372
commit fa06f03
Show file tree

Hide file tree

Showing 8 changed files with 52 additions and 3 deletions.
diff --git a/map.bat b/map.bat
@@ -0,0 +1 @@
+@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\map.py" %*
diff --git a/map.py b/map.py
@@ -0,0 +1,17 @@
+# !python36
+#-------------------------------------------------------------------------------
+# Name:        google.py
+# Purpose:     search google using command line
+# Author:      rakesh kumar
+# Created:     03-02-2018
+# Copyright:   binarynote.com
+# Licence:     MIT
+#-------------------------------------------------------------------------------
+
+import sys,webbrowser
+if( len (sys.argv) > 1):
+    query = "+".join(sys.argv[1:])
+    address = "https://www.google.com/maps/place/{}".format(query)
+    webbrowser.open(address)
+else:
+    webbrowser.open('http://www.google.com/maps/place/binarynote.com')
diff --git a/webscraper/first.py b/webscraper/first.py
@@ -0,0 +1,6 @@
+import requests
+res = requests.get('https://automatetheboringstuff.com/files/rj.txt')
+#print(res.text)
+file = open("abcd.txt","w")
+file.write(res.text)
+file.close()
diff --git a/webscraper/imagedownloader.py b/webscraper/imagedownloader.py
@@ -0,0 +1,10 @@
+import urllib.request
+import random
+
+def downloader(image_url):
+    file_name = random.randrange(1,10000)
+    full_file_name = str(file_name) + '.jpg'
+    urllib.request.urlretrieve(image_url,full_file_name)
+
+url ="http://www.binarynote.com/wp-content/themes/binarynote3/images/main.jpg"
+downloader(url)
diff --git a/webscraper/web_scraping.py b/webscraper/web_scraping.py
@@ -0,0 +1,12 @@
+#purpose        : simple example of web scrapping
+#author         : rakesh kumar
+#licence        : MIT
+from bs4 import BeautifulSoup
+import requests
+
+url = input("Enter a website to extract the URL's from: ")
+r  = requests.get("http://" +url)
+data = r.text
+soup = BeautifulSoup(data,"html.parser")
+for link in soup.find_all('a'):
+    print(link.get('href'))
diff --git a/website.bat b/website.bat
@@ -0,0 +1 @@
+@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\websites.py"
diff --git a/websites.py b/websites.py
@@ -1,5 +1,7 @@
+# !python36
 import webbrowser
 webbrowser.open_new_tab("http://www.binarynote.com")
-webbrowser.open_new_tab("clickbank.com")
-webbrowser.open_new_tab("shareasale.com")
-webbrowser.open_new_tab("themeforest.net")
+webbrowser.open_new_tab("https://www.clickbank.com")
+webbrowser.open_new_tab("https://account.shareasale.com/a-login.cfm?")
+webbrowser.open_new_tab("https://themeforest.net/")
+webbrowser.open_new_tab("https://automatetheboringstuff.com")
diff --git a/~$test.docx b/~$test.docx
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\map.py" %*