Skip to content

Commit

Permalink
Fix #54 - add spam classifier with google gemini flash 2.0 experiment…
Browse files Browse the repository at this point in the history
…al - also check for spam in more situations
  • Loading branch information
nonprofittechy committed Jan 3, 2025
1 parent f99f349 commit 16f6591
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 21 deletions.
43 changes: 25 additions & 18 deletions docassemble/GithubFeedbackForm/data/questions/feedback.yml
Original file line number Diff line number Diff line change
Expand Up @@ -311,26 +311,33 @@ need:
- package_version
- filename
code: |
if not task_performed('issue noted', persistent=True):
saved_uuid
if showifdef('would_be_on_panel', False):
add_panel_participant(panel_email)
if should_send_to_github:
issue_url
if issue_url:
if saved_uuid:
set_feedback_github_url(saved_uuid, issue_url)
else:
al_error_email
log(f"This form was not able to add an issue on the {github_user}/{github_repo} repo. Check your config.")
if al_error_email and not is_likely_spam(issue_template.content):
log(f"Unable to create issue on repo {github_repo}, falling back to emailing {al_error_email}")
send_email(to=al_error_email, subject=f"{github_repo} - {issue_template.subject_as_html(trim=True)}", template=issue_template)
else:
log(f"~~~USER FEEDBACK~~~ {github_repo} -{issue_template.subject_as_html(trim=True)} - {issue_template.content_as_html(trim=True)}")
if is_likely_spam(issue_template.content):
log("Not saving feedback because it looks like spam")
mark_task_as_performed('issue noted', persistent=True)
issue_url = None
saved_uuid = None
note_issue = False
else:
log("Already sent feedback to github from a feedback interview, not going to send again")
if not task_performed('issue noted', persistent=True):
saved_uuid
if showifdef('would_be_on_panel', False):
add_panel_participant(panel_email)
if should_send_to_github:
issue_url
if issue_url:
if saved_uuid:
set_feedback_github_url(saved_uuid, issue_url)
else:
al_error_email
log(f"This form was not able to add an issue on the {github_user}/{github_repo} repo. Check your config.")
if al_error_email and not is_likely_spam(issue_template.content):
log(f"Unable to create issue on repo {github_repo}, falling back to emailing {al_error_email}")
send_email(to=al_error_email, subject=f"{github_repo} - {issue_template.subject_as_html(trim=True)}", template=issue_template)
else:
log(f"~~~USER FEEDBACK~~~ {github_repo} -{issue_template.subject_as_html(trim=True)} - {issue_template.content_as_html(trim=True)}")
mark_task_as_performed('issue noted', persistent=True)
else:
log("Already sent feedback to github from a feedback interview, not going to send again")
note_issue = True
---
code: |
Expand Down
58 changes: 56 additions & 2 deletions docassemble/GithubFeedbackForm/github_issue.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
from urllib.parse import urlencode, quote_plus
from docassemble.base.util import log, get_config, interview_url
import re
try:
import google.generativeai as genai
except:
pass

# reference: https://gist.github.com/JeffPaine/3145490
# https://docs.github.com/en/free-pro-team@latest/rest/reference/issues#create-an-issue
Expand All @@ -16,11 +20,11 @@
"make_github_issue",
"feedback_link",
"is_likely_spam",
"is_likely_spam_from_genai",
"prefill_github_issue_url",
]
USERNAME = get_config("github issues", {}).get("username")


def _get_token() -> Optional[str]:
return (get_config("github issues") or {}).get("token")

Expand Down Expand Up @@ -168,6 +172,55 @@ def feedback_link(
)


def is_likely_spam_from_genai(body: Optional[str], context:Optional[str] = None, gemini_api_key:Optional[str] = None, model="gemini-2.0-flash-exp") -> bool:
"""
Check if the body of the issue is likely spam with the help of Google Gemini Flash experimental.
Args:
body (Optional[str]): the body of the issue
context (Optional[str]): the context of the issue to help rate it as spam or not, defaults to a guided interview in the legal context
gemini_token (Optional[str]): the token for the Google Gemini Flash API
"""
if not body:
return False

if not context:
context = "a guided interview in the legal context"

if not gemini_api_key:
gemini_api_key = get_config("google gemini api key")

if not gemini_api_key:
log("Not using Google Gemini Flash to check for spam: no token provided")
return False

try:
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
system_instruction = f"""
You are reviewing a feedback form for {context}. Your job is to allow as many
relevant feedback responses as possible while filtering out irrelevant and spam feedback,
especially targeted advertising that isn't pointing out a problem on the guided interview.
Rate the user's feedback as 'spam' or 'not spam' based on the context of the guided interview.
Answer only with the exact keywords: 'spam' or 'not spam'.
"""
)
except Exception as e:
log(f"Error configuring Google Gemini Flash: {e}")
return False

try:
response = model.generate_content(body)
if response.text.strip() == "spam":
return True
except Exception as e:
log(f"Error using Google Gemini Flash: {e}")
return False
return False


def is_likely_spam(
body: Optional[str], keywords: Optional[List[str]] = None, filter_urls: bool = True
) -> bool:
Expand All @@ -182,6 +235,7 @@ def is_likely_spam(
keywords (Optional[List[str]]): a list of keywords that are likely spam, defaults to a set of keywords
from the global configuration under the `github issues: spam keywords` key
"""

_urls = ["leadgeneration.com", "leadmagnet.com"]
_keywords = [
"100 times more effective",
Expand Down Expand Up @@ -244,7 +298,7 @@ def is_likely_spam(
if re.search(url_regex, body):
return True

return False
return is_likely_spam_from_genai(body)


def prefill_github_issue_url(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def find_package_data(where='.', package='', exclude=standard_exclude, exclude_d
url='https://courtformsonline.org',
packages=find_packages(),
namespace_packages=['docassemble'],
install_requires=['docassemble.ALToolbox>=0.6.0'],
install_requires=['docassemble.ALToolbox>=0.6.0', 'google-generativeai'],
zip_safe=False,
package_data=find_package_data(where='docassemble/GithubFeedbackForm/', package='docassemble.GithubFeedbackForm'),
)
Expand Down

0 comments on commit 16f6591

Please sign in to comment.