Skip to content

Commit

Permalink
trying more data collection from google scholar
Browse files Browse the repository at this point in the history
  • Loading branch information
jakeberv committed Aug 18, 2024
1 parent f9a9ddf commit 90dc470
Showing 1 changed file with 32 additions and 2 deletions.
34 changes: 32 additions & 2 deletions fetch_scholar_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,43 @@ def switch_proxy():
pg.FreeProxies()
scholarly.use_proxy(pg)

# Function to fetch publication details with retry mechanism
def fetch_publication_details(pub):
attempts = 0
while attempts < 3:
try:
pub_filled = scholarly.fill(pub) # Fill in the details of the publication
return {
'title': pub_filled['bib'].get('title', 'N/A'),
'citations': pub_filled.get('num_citations', 0),
'year': pub_filled['bib'].get('pub_year', 'N/A'),
'venue': pub_filled['bib'].get('venue', 'N/A'),
'url': pub_filled.get('pub_url', 'N/A')
}
except Exception as e:
print(f"Error fetching data for publication, attempt {attempts + 1}: {e}")
attempts += 1
switch_proxy() # Switch to a new proxy
time.sleep(1) # Wait a bit before retrying
print("Failed to fetch publication data after 3 attempts.")
return None

# Search for your profile using your Google Scholar ID
try:
author = scholarly.search_author_id('cQQaGZQAAAAJ')
author = scholarly.fill(author, sections=['basics', 'indices', 'coauthors', 'counts', 'public_access'])
author = scholarly.fill(author, sections=['basics', 'indices', 'coauthors', 'counts', 'publications', 'public_access'])
print(f"Fetched basic info for author: {author.get('name', 'Unknown')}")
except Exception as e:
print(f"Error fetching author data: {e}")
exit(1)

# Extract data for each publication
publications_data = []
for pub in author.get('publications', []):
pub_details = fetch_publication_details(pub)
if pub_details:
publications_data.append(pub_details)

# Extract and print the desired data
scholar_data = {
'name': author.get('name', 'N/A'),
Expand All @@ -39,7 +67,9 @@ def switch_proxy():
'profile_picture': author.get('url_picture', 'N/A'),
'homepage': author.get('homepage', 'N/A'),
'organization': author.get('organization', 'N/A'),
'public_access': author.get('public_access', 'N/A')
'public_access': author.get('public_access', 'N/A'),
'total_publications': len(author.get('publications', [])),
'publications': publications_data # Add detailed data for each publication
}

# Print the gathered information for debugging purposes
Expand Down

0 comments on commit 90dc470

Please sign in to comment.