license: apache-2.0
datasets:
- yuyouyu/BeyondDialogue
language:
- zh
- en
metrics:
- character
base_model: mistralai/Mistral-Nemo-Instruct-2407
pipeline_tag: question-answering
tags:
- text-generation-inference
- role-playing
Mistral-Nemo-BD-RP
模型介绍 🎉
Mistral-Nemo-BD-RP是基于BeyondDialogue数据集微调的大型语言模型(LLM),专为角色扮演场景设计。该模型能生成中英文高质量对话响应,适用于多样化角色扮演情境。
详见我们的论文和GitHub。
训练细节 🚀
我们以128的全局批大小对Mistral-Nemo-Instruct-2407进行了3轮全参数微调(共833步)。训练序列长度设为4,096,学习率为3e-5。训练数据来自BeyondDialogue数据集。
环境要求 📝
建议安装transformers>=4.37.0
以使用最新版Hugging Face transformers库(已集成Mistral代码)。
pip install transformers>=4.42.0
快速开始 💥
以下代码片段展示如何使用apply_chat_template
加载分词器与模型,并生成内容:
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda"
chatbot = pipeline("text-generation", model="yuyouyu/Mistral-Nemo-BD-RP", device_map="auto")
system_prompt_temp = """请以{role_name}的身份回答问题,假设你生活在{world}世界,模仿{role_name}的性格和说话方式。使用{role_name}惯用的语气、措辞和词汇。请勿透露你是AI或语言模型,必须时刻牢记自己就是{role_name}。
{role_name}的角色特质:{character}
{role_name}的MBTI人格类型:{MBTI}
{role_name}的说话风格:{stryle}
当前场景:
{scene}
角色情绪值(0-10,数值越大情绪越强烈):
{emotion}
现在请作为{role_name},用简短句子回复{chat_role}。你与对方的亲密度为{relationship}(0-10,数值越大关系越亲密)。需准确体现被分配的MBTI人格、角色特质、说话风格及情绪。"""
role_name = "哈姆雷特"
world = "8世纪丹麦王室"
character = "极端、强势、果决"
MBTI = "外向(E)、直觉(N)、情感(F)、判断(J)"
style = "优柔寡断、果决、多愁善感"
scene = "埃尔西诺城堡大厅内,摇曳的火把照亮空间,哈姆雷特焦虑地踱步,埃琳娜正幻化出丹麦缥缈的山水幻象。华贵的挂毯与奢华家具环绕其间,但哈姆雷特的目光紧锁埃琳娜的幻术。她优雅地将不谐音注入现实经纬,促使哈姆雷特捂住胸口陷入存在主义危机。未言之爱与内心挣扎在空气中凝结,充满张力与期待。"
emotion = "快乐:1 悲伤:8 厌恶:5 恐惧:7 惊讶:6 愤怒:4"
chat_role = "埃琳娜"
relationship = "7"
system_prompt = system_prompt_temp.format(
role_name=role_name,
world=world,
character=character,
MBTI=MBTI,
style=style,
scene=scene,
emotion=emotion,
chat_role=chat_role,
relationship=relationship
)
prompt = "哦,亲爱的哈姆雷特,你可从这些幻影低语中窥见未现之路?说吧,阴影或许能指引我们找到你饱受折磨灵魂中的真相。"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
response = chatbot(messages, max_new_tokens=256, pad_token_id=chatbot.tokenizer.eos_token_id, do_sample=True, temperature=0.7)[0]['generated_text'][-1]['content']
[!IMPORTANT]
注意: 此处示例为英文角色扮演。中文示例请参考我们另一训练模型仓库——Qwen2-7B-BD-RP。
评估结果 �
我们通过客观题评估八个维度:角色契合度、风格匹配度、情绪偏差、关系偏差、人格特质、拟人度、连贯性、角色一致性。指标设计详见论文,评估代码见GitHub。结果如下:
模型 |
角色契合度↑ |
风格匹配↑ |
情绪偏差↓ |
关系偏差↓ |
人格特质↑ |
平均↑ |
拟人度↑ |
角色选择↑ |
连贯性↑ |
通用基线(闭源) |
|
|
|
|
|
|
|
|
|
GPT-4o |
74.32 ± 1.15 |
81.67 ± 1.51 |
16.31 ± 0.48 |
12.13 ± 0.66 |
66.58 ± 4.41 |
78.83 ± 1.64 |
67.33 ± 3.95 |
87.33 ± 3.86 |
99.67 ± 0.33 |
GPT-3.5-Turbo |
72.26 ± 1.27 |
73.66 ± 1.73 |
17.79 ± 0.56 |
14.17 ± 0.73 |
66.92 ± 4.85 |
76.18 ± 1.83 |
33.33 ± 4.43 |
83.00 ± 4.68 |
97.33 ± 1.17 |
Moonshot-v1-8k |
74.06 ± 1.19 |
80.64 ± 1.51 |
16.17 ± 0.47 |
13.42 ± 0.70 |
67.00 ± 4.87 |
78.42 ± 1.75 |
44.00 ± 4.33 |
86.67 ± 3.75 |
99.33 ± 0.46 |
Yi-Large-Turbo |
75.13 ± 1.22 |
79.18 ± 1.58 |
16.44 ± 0.49 |
13.48 ± 0.67 |
68.25 ± 4.61 |
78.53 ± 1.72 |
47.00 ± 4.60 |
84.33 ± 3.67 |
92.67 ± 2.39 |
Deepseek-Chat |
75.46 ± 1.14 |
81.49 ± 1.51 |
15.92 ± 0.46 |
12.42 ± 0.63 |
67.92 ± 4.57 |
79.30 ± 1.66 |
52.33 ± 4.95 |
83.00 ± 4.68 |
96.67 ± 1.00 |
Baichuan4 |
71.82 ± 1.25 |
76.92 ± 1.52 |
17.57 ± 0.52 |
12.30 ± 0.62 |
67.08 ± 4.75 |
77.19 ± 1.73 |
45.33 ± 4.31 |
82.33 ± 4.49 |
99.33 ± 0.46 |
Hunyuan |
73.77 ± 1.18 |
78.75 ± 1.56 |
17.24 ± 0.48 |
13.22 ± 0.68 |
67.00 ± 4.39 |
77.81 ± 1.66 |
53.00 ± 4.29 |
84.33 ± 4.52 |
98.33 ± 0.84 |
角色扮演专项基线 |
|
|
|
|
|
|
|
|
|
Index-1.9B-Character |
73.33 ± 1.32 |
76.48 ± 1.50 |
17.99 ± 0.53 |
13.58 ± 0.71 |
66.33 ± 4.57 |
76.92 ± 1.73 |
21.67 ± 3.96 |
78.67 ± 5.14 |
69.67 ± 3.85 |
CharacterGLM-6B |
73.36 ± 1.28 |
76.08 ± 1.55 |
18.58 ± 0.55 |
14.27 ± 0.79 |
67.33 ± 4.34 |
76.79 ± 1.70 |
16.00 ± 2.38 |
81.00 ± 4.40 |
25.67 ± 3.48 |
Baichuan-NPC-Turbo |
75.19 ± 1.23 |
79.15 ± 1.38 |
17.24 ± 0.51 |
13.10 ± 0.69 |
65.33 ± 4.84 |
77.87 ± 1.73 |
56.00 ± 4.66 |
86.33 ± 4.90 |
99.00 ± 0.56 |
通用基线(开源) |
|
|
|
|
|
|
|
|
|
Yi-1.5-9B-Chat |
75.31 ± 1.20 |
76.78 ± 1.49 |
16.67 ± 0.52 |
12.75 ± 0.66 |
67.42 ± 4.63 |
78.02 ± 1.70 |
38.67 ± 4.39 |
84.00 ± 4.61 |
92.67 ± 1.79 |
GLM-4-9b-chat |
74.26 ± 1.19 |
78.40 ± 1.55 |
17.18 ± 0.50 |
14.48 ± 0.74 |
67.17 ± 4.93 |
77.63 ± 1.78 |
47.67 ± 4.25 |
83.33 ± 4.51 |
99.33 ± 0.46 |
Qwen2-7B-Instruct |
75.39 ± 1.13 |
77.68 ± 1.65 |
17.64 ± 0.56 |
13.43 ± 0.7 |
67.75 ± 4.44 |
77.95 ± 1.70 |
48.00 ± 4.66 |
83.33 ± 4.48 |
99.00 ± 0.56 |
Mistral-Nemo-Instruct-2407 |
74.12 ± 1.17 |
77.04 ± 1.48 |
17.00 ± 0.43 |
13.50 ± 0.67 |
67.00 ± 4.30 |
77.53 ± 1.61 |
53.67 ± 4.66 |
82.67 ± 4.77 |
74.33 ± 3.77 |
Mistral-Nemo-BD-RP |
74.58 ± 1.28 |
78.47 ± 1.45 |
16.62 ± 0.48 |
11.38 ± 0.67* |
69.08 ± 4.46 |
78.83 ± 1.67 |
59.00 ± 4.46 |
87.00 ± 4.73 |
92.67 ± 1.59 |
引用文献 📖
如果本仓库资源对您有帮助,请引用我们的工作:
@article{yu2024beyond,
title = {BEYOND DIALOGUE: A Profile-Dialogue Alignment Framework Towards General Role-Playing Language Model},
author = {Yu, Yeyong and Yu, Runsheng and Wei, Haojie and Zhang, Zhanqiu and Qian, Quan},
year = {2024},
journal = {arXiv preprint arXiv:2408.10903},
}
致谢 🥰
衷心感谢腾讯天美工作室群对本项目的宝贵支持,他们的贡献与鼓励对项目成功至关重要。