Skip to content

Commit

Permalink
Mit Colab erstellt
Browse files Browse the repository at this point in the history
  • Loading branch information
soberbichler committed Jan 13, 2025
1 parent f0e27f4 commit 55bb0be
Showing 1 changed file with 58 additions and 1 deletion.
59 changes: 58 additions & 1 deletion Named_Entity_Recognition_ImpressoAPI.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,63 @@
"id": "ri-HJXd2rl65"
}
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import time\n",
"from typing import Optional\n",
"\n",
"def process_text_with_ner(text: str, max_retries: int = 3, delay: int = 2) -> Optional[dict]:\n",
" \"\"\"\n",
" Process text with NER, including retry logic and error handling\n",
" \"\"\"\n",
" if pd.isna(text):\n",
" return None\n",
"\n",
" # Add consistent delay before each request\n",
" time.sleep(1) # Wait 1 second between requests\n",
"\n",
" for attempt in range(max_retries):\n",
" try:\n",
" result = impresso_session.tools.ner(text=text)\n",
" return result\n",
" except Exception as e:\n",
" if attempt < max_retries - 1:\n",
" print(f\"Retrying in {delay} seconds...\")\n",
" time.sleep(delay)\n",
" delay *= 2\n",
" continue\n",
" return None\n",
"\n",
"def process_dataframe(df: pd.DataFrame, text_column: str = 'extracted_article_clean') -> pd.DataFrame:\n",
" results = []\n",
"\n",
" for idx, row in df.iterrows():\n",
" print(f\"Processing row {idx + 1}/{len(df)}\")\n",
" try:\n",
" text = row[text_column]\n",
" result = process_text_with_ner(text)\n",
" results.append(result)\n",
" except KeyError:\n",
" print(f\"Column '{text_column}' not found. Available columns: {df.columns}\")\n",
" raise\n",
" except Exception as e:\n",
" print(f\"Error processing row {idx + 1}: {str(e)}\")\n",
" results.append(None)\n",
"\n",
" df['ner_results'] = results\n",
" return df\n",
"\n",
"# Process the DataFrame using extracted_article_clean column\n",
"articles_df = process_dataframe(articles_df)\n"
],
"metadata": {
"id": "UYW6HMFDdFB3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
Expand Down Expand Up @@ -162,7 +219,7 @@
"print(articles_df[['extracted_article_clean', 'places']].head())"
],
"metadata": {
"id": "UYW6HMFDdFB3"
"id": "Xi7NMYrAvGD9"
},
"execution_count": null,
"outputs": []
Expand Down

0 comments on commit 55bb0be

Please sign in to comment.