license: openrail
datasets:
- allenai/soda
language:
- en
pipeline_tag: conversational
Cadet-Tiny是什么?
受Allen AI的Cosmo-XL启发,Cadet-Tiny是一个基于SODA数据集训练的超小型对话模型,专为边缘设备推理设计(例如仅2GB内存的树莓派)。
Cadet-Tiny基于谷歌的t5-small预训练模型微调而成,其体积仅为Cosmo-3B模型的2%左右。
这是我打造的首个SEQ2SEQ自然语言处理模型!非常高兴能在HuggingFace上分享它!:)
如有疑问或改进建议,请联系:tcgoldfarb@gmail.com
Google Colab链接
这里是我训练模型和使用AI2的SODA公开数据集的Google Colab文件链接:
https://colab.research.google.com/drive/1cx3Yujr_jGQkseqzXZW-2L0vEyEjds_s?usp=sharing
快速上手Cadet-Tiny
使用以下代码片段开始体验Cadet-Tiny:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import colorful as cf
cf.use_true_colors()
cf.use_style('monokai')
class CadetTinyAgent:
def __init__(self):
print(cf.bold | cf.purple("正在启动Cadet-Tiny..."))
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.tokenizer = AutoTokenizer.from_pretrained("t5-small", model_max_length=512)
self.model = AutoModelForSeq2SeqLM.from_pretrained("ToddGoldfarb/Cadet-Tiny", low_cpu_mem_usage=True).to(self.device)
self.conversation_history = ""
def observe(self, observation):
self.conversation_history = self.conversation_history + observation
if len(self.conversation_history) > 400:
self.conversation_history = self.conversation_history[112:]
def set_input(self, situation_narrative="", role_instruction=""):
input_text = "dialogue: "
if situation_narrative != "":
input_text = input_text + situation_narrative
if role_instruction != "":
input_text = input_text + " <SEP> " + role_instruction
input_text = input_text + " <TURN> " + self.conversation_history
return input_text
def generate(self, situation_narrative, role_instruction, user_response):
user_response = user_response + " <TURN> "
self.observe(user_response)
input_text = self.set_input(situation_narrative, role_instruction)
inputs = self.tokenizer([input_text], return_tensors="pt").to(self.device)
outputs = self.model.generate(inputs["input_ids"], max_new_tokens=512, temperature=0.75, top_p=.95,
do_sample=True)
cadet_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
added_turn = cadet_response + " <TURN> "
self.observe(added_turn)
return cadet_response
def reset_history(self):
self.conversation_history = []
def run(self):
def get_valid_input(prompt, default):
while True:
user_input = input(prompt)
if user_input in ["Y", "N", "y", "n"]:
return user_input
if user_input == "":
return default
while True:
continue_chat = ""
situation_narrative = "想象你是正在与???对话的Cadet-Tiny。"
role_instruction = "你是Cadet-Tiny,正在与???交谈。"
self.chat(situation_narrative, role_instruction)
continue_chat = get_valid_input(cf.purple("以新设定开始新对话?[Y/N]:"), "Y")
if continue_chat in ["N", "n"]:
break
print(cf.blue("CT: 再见!"))
def chat(self, situation_narrative, role_instruction):
print(cf.green(
"Cadet-Tiny运行中!输入[RESET]重置对话历史,输入[END]结束对话。"))
while True:
user_input = input("You: ")
if user_input == "[RESET]":
self.reset_history()
print(cf.green("[对话历史已清除,开始与Cadet-Tiny聊天!]"))
continue
if user_input == "[END]":
break
response = self.generate(situation_narrative, role_instruction, user_input)
print(cf.blue("CT: " + response))
def main():
print(cf.bold | cf.blue("模型加载中"))
CadetTiny = CadetTinyAgent()
CadetTiny.run()
if __name__ == '__main__':
main()
致谢与引用
特别感谢Hyunwoo Kim在SODA数据集使用方面给予的指导。推荐阅读其关于SODA、Prosocial-Dialog和COSMO的研究论文:
@article{kim2022soda,
title={SODA: Million-scale Dialogue Distillation with Social Commonsense Contextualization},
author={Hyunwoo Kim and Jack Hessel and Liwei Jiang and Peter West and Ximing Lu and Youngjae Yu and Pei Zhou and Ronan Le Bras and Malihe Alikhani and Gunhee Kim and Maarten Sap and Yejin Choi},
journal={ArXiv},
year={2022},
volume={abs/2212.10465}
}