From fa06f032f042c3f4f3bdd2b69a0fbb6980fae46c Mon Sep 17 00:00:00 2001 From: rakesh kumar Date: Fri, 20 Apr 2018 11:35:04 +0530 Subject: [PATCH] web scrapping --- map.bat | 1 + map.py | 17 +++++++++++++++++ webscraper/first.py | 6 ++++++ webscraper/imagedownloader.py | 10 ++++++++++ webscraper/web_scraping.py | 12 ++++++++++++ website.bat | 1 + websites.py | 8 +++++--- ~$test.docx | Bin 162 -> 0 bytes 8 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 map.bat create mode 100644 map.py create mode 100644 webscraper/first.py create mode 100644 webscraper/imagedownloader.py create mode 100644 webscraper/web_scraping.py create mode 100644 website.bat delete mode 100644 ~$test.docx diff --git a/map.bat b/map.bat new file mode 100644 index 0000000..2180be8 --- /dev/null +++ b/map.bat @@ -0,0 +1 @@ +@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\map.py" %* \ No newline at end of file diff --git a/map.py b/map.py new file mode 100644 index 0000000..285beda --- /dev/null +++ b/map.py @@ -0,0 +1,17 @@ +# !python36 +#------------------------------------------------------------------------------- +# Name: google.py +# Purpose: search google using command line +# Author: rakesh kumar +# Created: 03-02-2018 +# Copyright: binarynote.com +# Licence: MIT +#------------------------------------------------------------------------------- + +import sys,webbrowser +if( len (sys.argv) > 1): + query = "+".join(sys.argv[1:]) + address = "https://www.google.com/maps/place/{}".format(query) + webbrowser.open(address) +else: + webbrowser.open('http://www.google.com/maps/place/binarynote.com') \ No newline at end of file diff --git a/webscraper/first.py b/webscraper/first.py new file mode 100644 index 0000000..817f08f --- /dev/null +++ b/webscraper/first.py @@ -0,0 +1,6 @@ +import requests +res = requests.get('https://automatetheboringstuff.com/files/rj.txt') +#print(res.text) +file = open("abcd.txt","w") +file.write(res.text) +file.close() \ No newline at end of file diff --git a/webscraper/imagedownloader.py b/webscraper/imagedownloader.py new file mode 100644 index 0000000..4c18df6 --- /dev/null +++ b/webscraper/imagedownloader.py @@ -0,0 +1,10 @@ +import urllib.request +import random + +def downloader(image_url): + file_name = random.randrange(1,10000) + full_file_name = str(file_name) + '.jpg' + urllib.request.urlretrieve(image_url,full_file_name) + +url ="http://www.binarynote.com/wp-content/themes/binarynote3/images/main.jpg" +downloader(url) \ No newline at end of file diff --git a/webscraper/web_scraping.py b/webscraper/web_scraping.py new file mode 100644 index 0000000..35566fc --- /dev/null +++ b/webscraper/web_scraping.py @@ -0,0 +1,12 @@ +#purpose : simple example of web scrapping +#author : rakesh kumar +#licence : MIT +from bs4 import BeautifulSoup +import requests + +url = input("Enter a website to extract the URL's from: ") +r = requests.get("http://" +url) +data = r.text +soup = BeautifulSoup(data,"html.parser") +for link in soup.find_all('a'): + print(link.get('href')) \ No newline at end of file diff --git a/website.bat b/website.bat new file mode 100644 index 0000000..ea8d152 --- /dev/null +++ b/website.bat @@ -0,0 +1 @@ +@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\websites.py" \ No newline at end of file diff --git a/websites.py b/websites.py index 32ecd2e..c9086c7 100644 --- a/websites.py +++ b/websites.py @@ -1,5 +1,7 @@ +# !python36 import webbrowser webbrowser.open_new_tab("http://www.binarynote.com") -webbrowser.open_new_tab("clickbank.com") -webbrowser.open_new_tab("shareasale.com") -webbrowser.open_new_tab("themeforest.net") \ No newline at end of file +webbrowser.open_new_tab("https://www.clickbank.com") +webbrowser.open_new_tab("https://account.shareasale.com/a-login.cfm?") +webbrowser.open_new_tab("https://themeforest.net/") +webbrowser.open_new_tab("https://automatetheboringstuff.com") \ No newline at end of file diff --git a/~$test.docx b/~$test.docx deleted file mode 100644 index 0d977a0185ef65442a09002e555a5467303d02e7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 162 zcmd<|%}g%JFV0UZVITo;GWarNG9)t;G2}B8192JyL_02VZU$$ds#2i(Oomh-slea| Kq{|tI5eEPXvK6NQ