Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

init #1

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions judgement-text-annotator/__init__.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. __init__模块一般写导入信息,不会有业务代码,放在__main__.py模块更佳。
  2. judgement-text-annotator/judgement_text_annotator/__main__.py,在项目根目录下运行时python -m judgement_text_annotator

Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import json
from datetime import datetime
from docx import Document
import gradio as gr
import csv
import os
import tempfile
from io import StringIO
from dotenv import load_dotenv

from openai import OpenAI

load_dotenv()

# Deepseek API
API_key = os.getenv("DEEPSEEK_API_KEY")

file_path = []

mark_judgment_info = ""
def extract_info_from_judgment(judgment_text):
system_prompt = """
角色:
你是一位专业且经验丰富的法律文书信息提取助手。

背景:
我们有一系列交通事故相关的法律文书,需要从中提取关键信息。

知识:
法律文书包含了诸如案件类型、文书ID、案件名称、案件编号、裁判日期等多方面的信息。

任务:
仔细阅读输入的法律文书,按照规定的JSON格式准确提取各项信息。若文书中未提及某项信息,则对应字段填写“空”。

其他:
JSON格式不用加上```json```标记,直接填写即可。

"""

user_prompt = """请从以下法律文书中提取相关信息:
"案件类型": "空",
"文书ID": "空",
"案件名称一": "空",
"案件名称二": "空",
"案件编号": "空",
"裁判日期": "空",
"法院名称": "空",
"肇事人": "空",
"性别": "空",
"出生日期": "空",
"民族": "空",
"文化程度": "空",
"户籍所在地": "空",
"案发时间": "空",
"车辆品牌和车型": "空",
"事故发生地": "空",
"酒精": "空",
"伤亡数量": "空",
"驾照实习期开始": "空",
"驾驶实习期结束": "空",
"驾照类型": "空",
"实习期类型": "空",
"经济损失": "空",
"撤销案件号": "空",
"维持案件号": "空"
""" + judgment_text

messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]

client = OpenAI(api_key=API_key, base_url="https://api.deepseek.com")

try:
completion = client.chat.completions.create(
model="deepseek-chat",
messages=messages
)
print(completion)
content = completion.choices[0].message.content if completion.choices else ""
# print(content)

try:
info_dict = json.loads(content)
return info_dict
except json.JSONDecodeError as e:
print(f"解析JSON时出错: {e}")
return {}

except Exception as e:
print(f"An error occurred: {e}")
return {}

def mark_judgment(judgment_text):
global mark_judgment_info
if judgment_text == "":
return []
info_dict = extract_info_from_judgment(judgment_text)
mark_judgment_info = [[key, value] for key, value in info_dict.items()]
return mark_judgment_info

def download_csv():
output = StringIO()
writer = csv.writer(output)
for key, value in mark_judgment_info:
writer.writerow([key, value])
output.seek(0)

# 保存到临时文件
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
temp_file_path = os.path.join(tempfile.gettempdir(), f"marked_judgment_{timestamp}.csv")


with open(temp_file_path, 'w', newline='', encoding='utf-8') as f:
f.write(output.getvalue())

file_path.append(temp_file_path)

return file_path

def clear_input():
return "", []

def read_docx(file):
doc = Document(file.name)
content = []
for paragraph in doc.paragraphs:
content.append(paragraph.text)
return '\n'.join(content)

def read_uploaded_file(file):

if file is None:
return ""
print(file.name)
is_doc = file.name.endswith(".doc") or file.name.endswith(".docx")
if is_doc:
connect = read_docx(file)
return connect
else:
try:
with open(file.name, 'r', encoding='utf-8') as f:
content = f.read()
return content
except UnicodeDecodeError as e:
return "读取文件时出现编码错误,请检查文件编码是否正确"

def main():
with gr.Blocks() as demo:
gr.Markdown("输入判决书文本内容,系统将输出标记后的信息。")

with gr.Row():
with gr.Column():
input_text = gr.Textbox(
lines=10,
placeholder="请输入判决书文本内容...",
label="",
interactive=True
)
with gr.Row():
submit_btn = gr.Button("提交")
clear_btn = gr.Button("清除")
with gr.Column():
output_boxes = gr.DataFrame(headers=["Key", "Value"], visible=True, wrap=True, interactive=True)
download_btn = gr.Button("转为CSV", elem_id="download-btn")
with gr.Row():
upload_file = gr.File(label="上传判决书", elem_id="upload-file")

upload_file.change(fn=read_uploaded_file, inputs=upload_file, outputs=input_text)
submit_btn.click(fn=mark_judgment, inputs=input_text, outputs=output_boxes)
clear_btn.click(fn=clear_input, outputs=[input_text, output_boxes])
download_btn.click(fn=download_csv, outputs=gr.File(label="CSV文件"))

demo.launch()

if __name__ == "__main__":
main()
1 change: 0 additions & 1 deletion python_package/__init__.py

This file was deleted.