Skip to content

Commit

Permalink
web scrapping
Browse files Browse the repository at this point in the history
  • Loading branch information
rakeshlinux committed Apr 20, 2018
1 parent ae73372 commit fa06f03
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 3 deletions.
1 change: 1 addition & 0 deletions map.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\map.py" %*
17 changes: 17 additions & 0 deletions map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# !python36
#-------------------------------------------------------------------------------
# Name: google.py
# Purpose: search google using command line
# Author: rakesh kumar
# Created: 03-02-2018
# Copyright: binarynote.com
# Licence: MIT
#-------------------------------------------------------------------------------

import sys,webbrowser
if( len (sys.argv) > 1):
query = "+".join(sys.argv[1:])
address = "https://www.google.com/maps/place/{}".format(query)
webbrowser.open(address)
else:
webbrowser.open('http://www.google.com/maps/place/binarynote.com')
6 changes: 6 additions & 0 deletions webscraper/first.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import requests
res = requests.get('https://automatetheboringstuff.com/files/rj.txt')
#print(res.text)
file = open("abcd.txt","w")
file.write(res.text)
file.close()
10 changes: 10 additions & 0 deletions webscraper/imagedownloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import urllib.request
import random

def downloader(image_url):
file_name = random.randrange(1,10000)
full_file_name = str(file_name) + '.jpg'
urllib.request.urlretrieve(image_url,full_file_name)

url ="http://www.binarynote.com/wp-content/themes/binarynote3/images/main.jpg"
downloader(url)
12 changes: 12 additions & 0 deletions webscraper/web_scraping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#purpose : simple example of web scrapping
#author : rakesh kumar
#licence : MIT
from bs4 import BeautifulSoup
import requests

url = input("Enter a website to extract the URL's from: ")
r = requests.get("http://" +url)
data = r.text
soup = BeautifulSoup(data,"html.parser")
for link in soup.find_all('a'):
print(link.get('href'))
1 change: 1 addition & 0 deletions website.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
@ py "C:\Users\acer\Desktop\pythonbox\pythonPrograms\websites.py"
8 changes: 5 additions & 3 deletions websites.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# !python36
import webbrowser
webbrowser.open_new_tab("http://www.binarynote.com")
webbrowser.open_new_tab("clickbank.com")
webbrowser.open_new_tab("shareasale.com")
webbrowser.open_new_tab("themeforest.net")
webbrowser.open_new_tab("https://www.clickbank.com")
webbrowser.open_new_tab("https://account.shareasale.com/a-login.cfm?")
webbrowser.open_new_tab("https://themeforest.net/")
webbrowser.open_new_tab("https://automatetheboringstuff.com")
Binary file removed ~$test.docx
Binary file not shown.

0 comments on commit fa06f03

Please sign in to comment.