diff --git a/MetaDataScraper/FacebookScraper.py b/MetaDataScraper/FacebookScraper.py index 7b1c26c..6d917e6 100644 --- a/MetaDataScraper/FacebookScraper.py +++ b/MetaDataScraper/FacebookScraper.py @@ -4,11 +4,13 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys from webdriver_manager.chrome import ChromeDriverManager +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC import time import logging logging.getLogger().setLevel(logging.CRITICAL) -class FacebookScraper: +class LoginlessScraper: """ A class to scrape information from a public Facebook page. It does not require any authentication or API keys. @@ -58,8 +60,8 @@ class FacebookScraper: To scrape a Facebook page: ```python - from MetaDataScraper import FacebookScraper - scraper = FacebookScraper("page_id") + from MetaDataScraper import LoginlessScraper + scraper = LoginlessScraper("page_id") data = scraper.scrape() print(f"Followers: {data['followers']}") @@ -81,8 +83,8 @@ def __init__(self, page_id: str): Example ------- - To initialize a FacebookScraper object: - scraper = FacebookScraper("page_id") + To initialize a LoginlessScraper object: + scraper = LoginlessScraper("page_id") """ self.page_id = page_id self.driver = None @@ -138,7 +140,7 @@ def __scroll_to_top(self): def __get_xpath_constructor(self): """Constructs the XPath for locating posts on the Facebook page.""" - xpath_return_script = r""" + _xpath_return_script = r""" var iterator = document.evaluate('.//*[starts-with(@aria-label, "Like")]', document); var firstelement = iterator.iterateNext() var firstpost = firstelement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement @@ -176,80 +178,80 @@ def __get_xpath_constructor(self): } return xpath_first """ - xpath_constructor = self.driver.execute_script(xpath_return_script) - split_xpath = xpath_constructor.split('[') - split_index = split_xpath.index('1]/div/div/div/div/div/div/div/div/div/div/div') + _xpath_constructor = self.driver.execute_script(_xpath_return_script) + _split_xpath = _xpath_constructor.split('[') + _split_index = _split_xpath.index('1]/div/div/div/div/div/div/div/div/div/div/div') - self.xpath_first = '['.join(split_xpath[:split_index])+'[' - self.xpath_last = '['+'['.join(split_xpath[split_index+1:]) - self.xpath_identifier_addum = ']/div/div/div/div/div/div/div/div/div/div/div' + self._xpath_first = '['.join(_split_xpath[:_split_index])+'[' + self._xpath_last = '['+'['.join(_split_xpath[_split_index+1:]) + self._xpath_identifier_addum = ']/div/div/div/div/div/div/div/div/div/div/div' def __extract_post_details(self): """Extracts details of posts including text, likes, shares, and video links.""" - c = 1 - error_count = 0 + _c = 1 + _error_count = 0 while True: - xpath = self.xpath_first+str(c)+self.xpath_identifier_addum+self.xpath_last - if not self.driver.find_elements(By.XPATH, xpath): - error_count += 1 - if error_count < 3: - print('Error extracting post', c, '\b. Retrying extraction...', end='\r') + _xpath = self._xpath_first+str(c)+self._xpath_identifier_addum+self._xpath_last + if not self.driver.find_elements(By.XPATH, _xpath): + _error_count += 1 + if _error_count < 3: + print('Error extracting post', _c, '\b. Retrying extraction...', end='\r') time.sleep(5) self.driver.execute_script("window.scrollBy(0, +20);") continue break - error_count = 0 + _error_count = 0 # Scroll until the post is visible - self.driver.execute_script("arguments[0].scrollIntoView();", self.driver.find_elements(By.XPATH, xpath)[0]) - if not self.driver.find_elements(By.XPATH, xpath): - error_count += 1 - if error_count < 3: - print('Error extracting post', c, '\b. Retrying extraction...', end='\r') + self.driver.execute_script("arguments[0].scrollIntoView();", self.driver.find_elements(By.XPATH, _xpath)[0]) + if not self.driver.find_elements(By.XPATH, _xpath): + _error_count += 1 + if _error_count < 3: + print('Error extracting post', _c, '\b. Retrying extraction...', end='\r') time.sleep(5) self.driver.execute_script("window.scrollBy(0, +20);") continue break - error_count = 0 + _error_count = 0 print(" "*100, end='\r') - print('Extracting post', c, end='\r') - post_components = self.driver.find_element(By.XPATH, xpath).find_elements(By.XPATH, './*') - if len(post_components) > 2: - post_text = '\n'.join(post_components[2].text.split('\n')) - if post_components[3].text.split('\n')[0]=='All reactions:': - post_like = post_components[3].text.split('\n')[1] - if len(post_components[3].text.split('\n'))>4: - post_share = post_components[3].text.split('\n')[3].split(' ')[0] - elif len(post_components)>4 and post_components[4].text.split('\n')[0]=='All reactions:': - post_like = post_components[4].text.split('\n')[1] - post_share = post_components[4].text.split('\n')[4].split(' ')[0] + print('Extracting post', _c, end='\r') + _post_components = self.driver.find_element(By.XPATH, _xpath).find_elements(By.XPATH, './*') + if len(_post_components) > 2: + _post_text = '\n'.join(_post_components[2].text.split('\n')) + if _post_components[3].text.split('\n')[0]=='All reactions:': + _post_like = _post_components[3].text.split('\n')[1] + if len(_post_components[3].text.split('\n'))>4: + _post_share = _post_components[3].text.split('\n')[3].split(' ')[0] + elif len(_post_components)>4 and _post_components[4].text.split('\n')[0]=='All reactions:': + _post_like = _post_components[4].text.split('\n')[1] + _post_share = _post_components[4].text.split('\n')[4].split(' ')[0] else: - post_like = 0 - post_share = 0 - self.post_texts.append(post_text) - self.post_likes.append(post_like) - self.post_shares.append(post_share) + _post_like = 0 + _post_share = 0 + self.post_texts.append(_post_text) + self.post_likes.append(_post_like) + self.post_shares.append(_post_share) else: try: - post_share = post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text + _post_share = _post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text except: - c+=1 + _c+=1 continue - post_like = post_components[1].find_element(By.XPATH, './/*[@aria-label="Like"]').text - post_share = post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text + _post_like = _post_components[1].find_element(By.XPATH, './/*[@aria-label="Like"]').text + _post_share = _post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text time.sleep(1) self.post_texts.append('') - self.post_likes.append(post_like) - self.post_shares.append(post_share) - if len(self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'video')) > 0: - if 'reels' in self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href'): - self.video_links.append('https://www.facebook.com'+self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href')) + self.post_likes.append(_post_like) + self.post_shares.append(_post_share) + if len(self.driver.find_elements(By.XPATH, _xpath)[0].find_elements(By.TAG_NAME, 'video')) > 0: + if 'reels' in self.driver.find_elements(By.XPATH, _xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href'): + self.video_links.append('https://www.facebook.com'+self.driver.find_elements(By.XPATH, _xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href')) else: - self.video_links.append(self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[4].get_attribute('href')) + self.video_links.append(self.driver.find_elements(By.XPATH, _xpath)[0].find_elements(By.TAG_NAME, 'a')[4].get_attribute('href')) self.is_video.append(True) else: self.is_video.append(False) self.video_links.append('') - c += 1 + _c += 1 self.post_likes = [int(i) if str(i).isdigit() else 0 for i in self.post_likes] self.post_shares = [int(i) if str(i).isdigit() else 0 for i in self.post_shares] @@ -281,7 +283,7 @@ def scrape(self) -> dict: Example ------- To scrape a Facebook page: - scraper = FacebookScraper("page_id") + scraper = LoginlessScraper("page_id") data = scraper.scrape() @@ -311,4 +313,296 @@ def scrape(self) -> dict: } finally: if self.driver: - self.driver.quit() \ No newline at end of file + self.driver.quit() + +class LoggedInScraper: + """ + A class to scrape information from a public Facebook page. Handles the drawbacks of Loginless scraper, since some pages aren't + accessible without authentication. It requires authentication to scrape information from private pages. + + Attributes + ---------- + + `page_id` : str + The Facebook page ID to scrape information from. + + `email` : str + The email address associated with the Facebook account. + + `password` : str + The password of the Facebook account. + + `driver` : webdriver.Chrome + The Selenium WebDriver instance. + + `followers` : str + The followers count of the Facebook page. + + `post_texts` : list + The list of texts from the posts. + + `post_likes` : list + The list of likes count for the posts. + + `post_shares` : list + The list of shares count for the posts. + + `is_video` : list + The list indicating whether the post contains a video. + + `video_links` : list + The list of video links if the post contains a video. + + Methods + ------- + `scrape`(self) -> dict: + Initiates the scraping process and returns a dictionary with the scraped data. + + Returns + ------- + [dict] + A dictionary containing the following:- + + `followers` (str): + The followers count of the Facebook page. + + `post_texts` (list): + A list of texts from the posts. + + `post_likes` (list): + A list of likes count for the posts. + + `post_shares` (list): + A list of shares count for the posts. + + `is_video` (list): + A list indicating whether the post contains a video. + + `video_links` (list): + A list of video links if the post contains a video. + + Example + ------- + To scrape a Facebook page: + + ```python + from MetaDataScraper import LoggedInScraper + scraper = LoggedInScraper("page_id", "email", "password") + data = scraper.scrape() + + print(f"Followers: {data['followers']}") + print(f"Post Texts: {data['post_texts']}") + print(f"Post Likes: {data['post_likes']}") + print(f"Post Shares: {data['post_shares']}") + print(f"Is Video: {data['is_video']}") + print(f"Video Links: {data['video_links']}") + """ + def __init__(self, page_id: str, email: str, password: str): + """ + Constructs all the necessary attributes for the WithLogin object. + + Parameters + ---------- + page_id : str + The Facebook page ID to scrape information from. + email : str + The email address used for Facebook login. + password : str + The password used for Facebook login. + + Example + ------- + To initialize a LoggedInScraper object: + scraper = LoggedInScraper("page_id", "email@example.com", "password") + """ + self.page_id = page_id + self.email = email + self.password = password + self.driver = None + self.followers = None + self.post_texts = [] + self.post_likes = [] + self.post_shares = [] + self.is_video = [] + self.video_links = [] + + def __setup_driver(self): + """Sets up the Selenium WebDriver with necessary options.""" + service = Service(ChromeDriverManager().install()) + options = webdriver.ChromeOptions() + options.add_argument("--headless=new") + options.add_argument("--log-level=3") + options.add_argument("--disable-notifications") + options.add_argument("--disable-extensions") + options.add_argument("--disable-popup-blocking") + self.driver = webdriver.Chrome(service=service, options=options) + + def __login(self): + """Logs into Facebook using the provided credentials.""" + logged_in = False + while not logged_in: + if self.driver.find_elements(By.ID, 'not_me_link'): + self.driver.find_element(By.ID, 'not_me_link').click() + self.driver.get('https://www.facebook.com/login') + self.driver.find_element(By.NAME, 'email').clear() + self.driver.find_element(By.NAME, 'email').send_keys(self.email) + self.driver.find_element(By.NAME, 'pass').clear() + self.driver.find_element(By.NAME, 'pass').send_keys(self.password) + self.driver.find_element(By.ID, 'loginbutton').click() + # Wait until the login process is completed + WebDriverWait(self.driver, 10).until(EC.url_changes('https://www.facebook.com/login')) + if self.driver.current_url != 'https://www.facebook.com/?sk=welcome': + print("Invalid credentials. Please try again.", end='\r') + else: + print(" "*100, end='\r') + logged_in = True + + def __navigate_to_page(self): + """Navigates to the specified Facebook page.""" + url = f"https://www.facebook.com/{self.page_id}" + self.driver.get(url) + + def __check_page_accessibility(self): + """ + Checks if the page is accessible. + If not, it quits the driver and raises an exception. + """ + if "This content isn't available at the moment" in self.driver.find_element(By.TAG_NAME, "body").get_attribute("innerText"): + self.driver.quit() + raise Exception("Page is inaccessible. Try the script with login.") + + def __extract_followers_count(self): + """Extracts the followers count from the Facebook page.""" + for i in range(25): + followers = self.driver.execute_script(f"return document.getElementsByTagName('a')[{i}].innerText") + if 'followers' in followers: + self.followers = followers + break + time.sleep(2) + + def __scroll_to_top(self): + """Scrolls to the top of the page to ensure proper loading of elements.""" + self.driver.execute_script("window.scrollTo(0, 0);") + time.sleep(2) + + def __get_xpath_constructor(self): + """Constructs the XPath for locating posts on the Facebook page.""" + xpath_return_script = r""" + var iterator = document.evaluate('.//*[@aria-label="Like"]', document); + var firstelement = iterator.iterateNext() + var firstpost = firstelement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement + + function getXPath(element) { + let selector = ''; + let foundRoot; + let currentElement = element; + do { + const tagName = currentElement.tagName.toLowerCase(); + const parentElement = currentElement.parentElement; + if (parentElement.childElementCount > 1) { + const parentsChildren = [...parentElement.children]; + let tag = []; + parentsChildren.forEach(child => {if (child.tagName.toLowerCase() === tagName) tag.push(child)}) + if (tag.length === 1) selector = `/${tagName}${selector}`; + else { + const position = tag.indexOf(currentElement) + 1; + selector = `/${tagName}[${position}]${selector}`; + } + } + else selector = `/${tagName}${selector}`; + currentElement = parentElement; + foundRoot = parentElement.tagName.toLowerCase() === 'html'; + if(foundRoot) selector = `/html${selector}`; + } + while (foundRoot === false); + return selector; + } + xpath_first = getXPath(firstpost) + if(!xpath_first.contains('1]/div/div/div/div/div/div/div/div/div/div/div')) { + firstelement = iterator.iterateNext(); + firstpost = firstelement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement; + xpath_first = getXPath(firstpost); + } + return xpath_first + """ + xpath_constructor = self.driver.execute_script(xpath_return_script) + split_xpath = xpath_constructor.split('[') + split_index = split_xpath.index('1]/div/div/div/div/div/div/div/div/div/div/div') + self.xpath_first = '['.join(split_xpath[:split_index])+'[' + self.xpath_last = '['+'['.join(split_xpath[split_index+1:]) + self.xpath_identifier_addum = ']/div/div/div/div/div/div/div/div/div/div/div' + if len(self.driver.find_element(By.XPATH, xpath_constructor).find_elements(By.TAG_NAME, 'video')): + self.xpath_last = '/'.join(self.xpath_last.split('/')[:3]) + + def __extract_post_details(self): + """Extracts details of posts including text, likes, shares, and video links.""" + c = 1 + error_count = 0 + while True: + xpath = self.xpath_first + str(c) + self.xpath_identifier_addum + self.xpath_last + if not self.driver.find_elements(By.XPATH, xpath): + error_count += 1 + if error_count < 3: + print('Error extracting post', c, '\b. Count', error_count,'Retrying extraction...', end='\r') + time.sleep(5) + self.driver.execute_script("window.scrollBy(0, +40);") + continue + break + error_count = 0 + print(" "*100, end='\r') + print("Extracting data of post", c, end='\r') + self.driver.execute_script("arguments[0].scrollIntoView();", self.driver.find_elements(By.XPATH, xpath)[0]) + post_components = self.driver.find_element(By.XPATH, xpath).find_elements(By.XPATH, './*') + if len(post_components) > 2: + post_text = '\n'.join(post_components[2].text.split('\n')) + if post_components[3].text.split('\n')[0] == 'All reactions:': + post_likes = post_components[3].text.split('\n')[1] + if len(post_components[3].text.split('\n')) > 4: + post_shares = post_components[3].text.split('\n')[4].split(' ')[0] + elif len(post_components) > 4 and post_components[4].text.split('\n')[0] == 'All reactions:': + post_likes = post_components[4].text.split('\n')[1] + if len(post_components[4].text.split('\n')) > 4: + post_shares = post_components[4].text.split('\n')[4].split(' ')[0] + else: + post_likes = 0 + post_shares = 0 + self.post_texts.append(post_text) + self.post_likes.append(post_likes if post_likes else 0) + self.post_shares.append(post_shares if post_shares else 0) + elif len(post_components) == 2: + try: + post_shares = post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text + except: + print("Some error occurred while extracting post", c, ". Skipping post...", end='\r') + c += 1 + continue + post_likes = post_components[1].find_element(By.XPATH, './/*[@aria-label="Like"]').text + post_shares = post_components[1].find_element(By.XPATH, './/*[@aria-label="Share"]').text + self.post_texts.append('') + self.post_likes.append(post_likes if post_likes else 0) + self.post_shares.append(post_shares if post_shares else 0) + elif len(post_components) == 1: + post_text = post_components[0].text.split('\n')[0] + post_likes = post_components[0].find_element(By.XPATH, './/*[@aria-label="Like"]').text + post_shares = post_components[0].find_element(By.XPATH, './/*[@aria-label="Share"]').text + self.post_texts.append(post_text) + self.post_likes.append(post_likes if post_likes else 0) + self.post_shares.append(post_shares if post_shares else 0) + if len(self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'video')) > 0: + if 'reel' in self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href'): + self.video_links.append('https://www.facebook.com' + self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[0].get_attribute('href')) + else: + self.video_links.append(self.driver.find_elements(By.XPATH, xpath)[0].find_elements(By.TAG_NAME, 'a')[4].get_attribute('href')) + self.is_video.append(True) + else: + self.is_video.append(False) + self.video_links.append('') + c += 1 + + self.post_likes = [int(i) if str(i).isdigit() else 0 for i in self.post_likes] + self.post_shares = [int(i) if str(i).isdigit() else 0 for i in self.post_shares] + + def scrape(self): + """Initiates the scraping process and returns a dictionary with the scraped data.""" + self.__setup_driver() + self.__login() + self.__navigate_to_page() + self.__check_page_accessibility() + self.__extract_followers_count() + self.__scroll_to_top() + self.__get_xpath_constructor() + self.__extract_post_details() + self.driver.quit() + print("\033[A\033[A\033[A") # DevTools line deleter + return { + 'followers': self.followers, + 'post_texts': self.post_texts, + 'post_likes': self.post_likes, + 'post_shares': self.post_shares, + 'is_video': self.is_video, + 'video_links': self.video_links + } \ No newline at end of file diff --git a/MetaDataScraper/__init__.py b/MetaDataScraper/__init__.py index a4ff188..1516b0c 100644 --- a/MetaDataScraper/__init__.py +++ b/MetaDataScraper/__init__.py @@ -2,17 +2,23 @@ MetaDataScraper Module ---------------------- -This module provides a script `MetaDataScraper` to scrape information from a public Facebook page. +This module provides a script `MetaDataScraper` to scrape information from a public Facebook page. It can extract the follower count and post details & interactions from a Facebook page. +The module provides two classes: `LoginlessScraper` and `LoggedInScraper`. The `LoginlessScraper` class does not require any authentication or API keys to scrape the data. However, it has a drawback of being unable to access some Facebook pages. +The `LoggedInScraper` class overcomes this drawback by utilising the credentials of a Facebook account (of user) to login and scrape the data. Overview: --------- -The `MetaDataScraper` module is designed to automate the extraction of follower counts and post details -from a public Facebook page. It uses Selenium WebDriver for web automation and scraping. +The `MetaDataScraper` module is designed to automate the extraction of follower counts and post details from a public Facebook page. It provides two classes: `LoginlessScraper` and `LoggedInScraper`. +The `LoginlessScraper` class does not require any authentication or API keys to scrape the data. However, it has a drawback of being unable to access some Facebook pages. +The `LoggedInScraper` class overcomes this drawback by requiring the credentials of a Facebook account to login and scrape the data. The module uses Selenium WebDriver for web automation and scraping. Classes: -------- -+ FacebookScraper +1) `LoginlessScraper`: A class to scrape followers count and post details from a public Facebook page. It does not require any authentication or API keys. +2) `LoggedInScraper`: + A class to scrape followers count and post details from a public Facebook page. It handles the drawback of the LoginlessScraper involving the inaccessibility of some Facebook pages. + It requires credentials of a Facebook account to login and scrape the data. Methods: ------------------------ @@ -26,10 +32,11 @@ Usage: ------ - +#### 1) LoginlessScraper: ```python - from MetaDataScraper import FacebookScraper - scraper = FacebookScraper("page_id") + from MetaDataScraper import LoginlessScraper + page_id = "your_target_page_id" + scraper = LoginlessScraper(page_id) data = scraper.scrape() print(f"Followers: {data['followers']}") @@ -38,8 +45,25 @@ print(f"Post Shares: {data['post_shares']}") print(f"Is Video: {data['is_video']}") print(f"Video Links: {data['video_links']}") + ``` +#### 2) LoggedInScraper: + ```python + from MetaDataScraper import LoggedInScraper + page_id = "your_target_page_id" + email = "your_facebook_email" + password = "your_facebook_password" + scraper = LoggedInScraper(page_id, email, password) + data = scraper.scrape() + + print(f"Followers: {data['followers']}") + print(f"Post Texts: {data['post_texts']}") + print(f"Post Likes: {data['post_likes']}") + print(f"Post Shares: {data['post_shares']}") + print(f"Is Video: {data['is_video']}") + print(f"Video Links: {data['video_links']}") + """ -from .FacebookScraper import FacebookScraper +from .FacebookScraper import LoggedInScraper, LoginlessScraper -__all__ = ["FacebookScraper"] \ No newline at end of file +__all__ = ["LoggedInScraper", "LoginlessScraper"] \ No newline at end of file diff --git a/README.md b/README.md index 5b19561..de2cf48 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # MetaDataScraper -MetaDataScraper is a Python package designed to automate the extraction of information like follower counts, and post details & interactions from a public Facebook page, in the form of a FacebookScraper object. It uses Selenium WebDriver for web automation and scraping. +MetaDataScraper is a Python package designed to automate the extraction of information like follower counts, and post details & interactions from a public Facebook page, in the form of a list. It uses Selenium WebDriver for web automation and scraping. +The module provides two classes: `LoginlessScraper` and `LoggedInScraper`. The `LoginlessScraper` class does not require any authentication or API keys to scrape the data. However, it has a drawback of being unable to access some Facebook pages. +The `LoggedInScraper` class overcomes this drawback by utilising the credentials of a Facebook account (of user) to login and scrape the data. ## Installation @@ -16,16 +18,20 @@ Make sure you have Python 3.x and pip installed. To use MetaDataScraper, follow these steps: -1. Import the FacebookScraper class: +1. Import the `LoginlessScraper` or the `LoggedInScraper` class: ```python - from MetaDataScraper import FacebookScraper + from MetaDataScraper import LoginlessScraper, LoggedInScraper ``` 2. Initialize the scraper with the Facebook page ID: ```python - scraper = FacebookScraper("page_id") + page_id = "your_target_page_id" + scraper = LoginlessScraper(page_id) + email = "your_facebook_email" + password = "your_facebook_password" + scraper = LoggedInScraper(page_id, email, password) ``` 3. Scrape the Facebook page to retrieve information: @@ -47,9 +53,14 @@ To use MetaDataScraper, follow these steps: ## Features -- **Scraping:** Extracts followers count and post details (text, likes, shares, video links) from Facebook pages. -- **Flexibility:** Handles various post structures and video formats on Facebook pages. -- **Headless Mode:** Runs in headless mode for silent scraping without UI interference. +- **Automated Extraction**: Automatically fetches follower counts, post texts, likes, shares, and video links from Facebook pages. +- **Comprehensive Data Retrieval**: Retrieves detailed information about each post, including text content, interaction metrics (likes, shares), and multimedia (e.g., video links). +- **Flexible Handling**: Adapts to diverse post structures and various types of multimedia content present on Facebook pages, like post texts or reels. +- **Enhanced Access with Logged-In Scraper**: Overcomes limitations faced by anonymous scraping (loginless) by utilizing Facebook account credentials for broader page access. +- **Headless Operation**: Executes scraping tasks in headless mode, ensuring seamless and non-intrusive data collection without displaying a browser interface. +- **Scalability**: Supports scaling to handle large volumes of data extraction efficiently, suitable for monitoring multiple Facebook pages simultaneously. +- **Dependency Management**: Utilizes Selenium WebDriver for robust web automation and scraping capabilities, compatible with Python 3.x environments. +- **Ease of Use**: Simplifies the process with straightforward initialization and method calls, facilitating quick integration into existing workflows. ## Dependencies @@ -58,4 +69,4 @@ To use MetaDataScraper, follow these steps: ## License -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file +This project is licensed under the Apache Software License Version 2.0 - see the [LICENSE](https://github.com/ishan-surana/MetaDataScraper/blob/main/LICENCE) file for details. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 215308e..4148da3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaDataScraper" -version = "0.0.3" +version = "1.0.0" authors = [ { name="Ishan Surana", email="ishansurana1234@gmail.com" }, ]