generated from quanttide/quanttide-example-of-python-package
-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
init #1
Open
jiahaoshao
wants to merge
6
commits into
quanttide:main
Choose a base branch
from
jiahaoshao:main
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
init #1
Changes from 4 commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
import json | ||
from datetime import datetime | ||
from docx import Document | ||
import gradio as gr | ||
import csv | ||
import os | ||
import tempfile | ||
from io import StringIO | ||
from dotenv import load_dotenv | ||
|
||
from openai import OpenAI | ||
|
||
load_dotenv() | ||
|
||
# Deepseek API | ||
API_key = os.getenv("DEEPSEEK_API_KEY") | ||
|
||
file_path = [] | ||
|
||
mark_judgment_info = "" | ||
def extract_info_from_judgment(judgment_text): | ||
system_prompt = """ | ||
角色: | ||
你是一位专业且经验丰富的法律文书信息提取助手。 | ||
|
||
背景: | ||
我们有一系列交通事故相关的法律文书,需要从中提取关键信息。 | ||
|
||
知识: | ||
法律文书包含了诸如案件类型、文书ID、案件名称、案件编号、裁判日期等多方面的信息。 | ||
|
||
任务: | ||
仔细阅读输入的法律文书,按照规定的JSON格式准确提取各项信息。若文书中未提及某项信息,则对应字段填写“空”。 | ||
|
||
其他: | ||
JSON格式不用加上```json```标记,直接填写即可。 | ||
|
||
""" | ||
|
||
user_prompt = """请从以下法律文书中提取相关信息: | ||
"案件类型": "空", | ||
"文书ID": "空", | ||
"案件名称一": "空", | ||
"案件名称二": "空", | ||
"案件编号": "空", | ||
"裁判日期": "空", | ||
"法院名称": "空", | ||
"肇事人": "空", | ||
"性别": "空", | ||
"出生日期": "空", | ||
"民族": "空", | ||
"文化程度": "空", | ||
"户籍所在地": "空", | ||
"案发时间": "空", | ||
"车辆品牌和车型": "空", | ||
"事故发生地": "空", | ||
"酒精": "空", | ||
"伤亡数量": "空", | ||
"驾照实习期开始": "空", | ||
"驾驶实习期结束": "空", | ||
"驾照类型": "空", | ||
"实习期类型": "空", | ||
"经济损失": "空", | ||
"撤销案件号": "空", | ||
"维持案件号": "空" | ||
""" + judgment_text | ||
|
||
messages = [ | ||
{"role": "system", "content": system_prompt}, | ||
{"role": "user", "content": user_prompt} | ||
] | ||
|
||
client = OpenAI(api_key=API_key, base_url="https://api.deepseek.com") | ||
|
||
try: | ||
completion = client.chat.completions.create( | ||
model="deepseek-chat", | ||
messages=messages | ||
) | ||
print(completion) | ||
content = completion.choices[0].message.content if completion.choices else "" | ||
# print(content) | ||
|
||
try: | ||
info_dict = json.loads(content) | ||
return info_dict | ||
except json.JSONDecodeError as e: | ||
print(f"解析JSON时出错: {e}") | ||
return {} | ||
|
||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
return {} | ||
|
||
def mark_judgment(judgment_text): | ||
global mark_judgment_info | ||
if judgment_text == "": | ||
return [] | ||
info_dict = extract_info_from_judgment(judgment_text) | ||
mark_judgment_info = [[key, value] for key, value in info_dict.items()] | ||
return mark_judgment_info | ||
|
||
def download_csv(): | ||
output = StringIO() | ||
writer = csv.writer(output) | ||
for key, value in mark_judgment_info: | ||
writer.writerow([key, value]) | ||
output.seek(0) | ||
|
||
# 保存到临时文件 | ||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") | ||
temp_file_path = os.path.join(tempfile.gettempdir(), f"marked_judgment_{timestamp}.csv") | ||
|
||
|
||
with open(temp_file_path, 'w', newline='', encoding='utf-8') as f: | ||
f.write(output.getvalue()) | ||
|
||
file_path.append(temp_file_path) | ||
|
||
return file_path | ||
|
||
def clear_input(): | ||
return "", [] | ||
|
||
def read_docx(file): | ||
doc = Document(file.name) | ||
content = [] | ||
for paragraph in doc.paragraphs: | ||
content.append(paragraph.text) | ||
return '\n'.join(content) | ||
|
||
def read_uploaded_file(file): | ||
|
||
if file is None: | ||
return "" | ||
print(file.name) | ||
is_doc = file.name.endswith(".doc") or file.name.endswith(".docx") | ||
if is_doc: | ||
connect = read_docx(file) | ||
return connect | ||
else: | ||
try: | ||
with open(file.name, 'r', encoding='utf-8') as f: | ||
content = f.read() | ||
return content | ||
except UnicodeDecodeError as e: | ||
return "读取文件时出现编码错误,请检查文件编码是否正确" | ||
|
||
def main(): | ||
with gr.Blocks() as demo: | ||
gr.Markdown("输入判决书文本内容,系统将输出标记后的信息。") | ||
|
||
with gr.Row(): | ||
with gr.Column(): | ||
input_text = gr.Textbox( | ||
lines=10, | ||
placeholder="请输入判决书文本内容...", | ||
label="", | ||
interactive=True | ||
) | ||
with gr.Row(): | ||
submit_btn = gr.Button("提交") | ||
clear_btn = gr.Button("清除") | ||
with gr.Column(): | ||
output_boxes = gr.DataFrame(headers=["Key", "Value"], visible=True, wrap=True, interactive=True) | ||
download_btn = gr.Button("转为CSV", elem_id="download-btn") | ||
with gr.Row(): | ||
upload_file = gr.File(label="上传判决书", elem_id="upload-file") | ||
|
||
upload_file.change(fn=read_uploaded_file, inputs=upload_file, outputs=input_text) | ||
submit_btn.click(fn=mark_judgment, inputs=input_text, outputs=output_boxes) | ||
clear_btn.click(fn=clear_input, outputs=[input_text, output_boxes]) | ||
download_btn.click(fn=download_csv, outputs=gr.File(label="CSV文件")) | ||
|
||
demo.launch() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file was deleted.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__init__
模块一般写导入信息,不会有业务代码,放在__main__.py
模块更佳。judgement-text-annotator/judgement_text_annotator/__main__.py
,在项目根目录下运行时python -m judgement_text_annotator