mutate
1.0.0
ไลบรารีเพื่อสังเคราะห์ชุดข้อมูลข้อความโดยใช้แบบจำลองภาษาขนาดใหญ่ (LLM) การกลายพันธุ์อ่านผ่านตัวอย่างในชุดข้อมูลและสร้างตัวอย่างที่คล้ายกันโดยใช้อัตโนมัติที่สร้างการถ่ายภาพไม่กี่นัด
pip install mutate-nlp
หรือ
pip install git+https://github.com/infinitylogesh/mutate
from mutate import pipeline
pipe = pipeline ( "text-classification-synthesis" ,
model = "EleutherAI/gpt-neo-2.7B" ,
device = 1 )
task_desc = "Each item in the following contains movie reviews and corresponding sentiments. Possible sentimets are neg and pos"
# returns a python generator
text_synth_gen = pipe ( "csv" ,
data_files = [ "local/path/sentiment_classfication.csv" ],
task_desc = task_desc ,
text_column = "text" ,
label_column = "label" ,
text_column_alias = "Comment" ,
label_column_alias = "sentiment" ,
shot_count = 5 ,
class_names = [ "pos" , "neg" ])
#Loop through the generator to synthesize examples by class
for synthesized_examples in text_synth_gen :
print ( synthesized_examples ){
"text" : [ "The story was very dull and was a waste of my time. This was not a film I would ever watch. The acting was bad. I was bored. There were no surprises. They showed one dinosaur," ,
"I did not like this film. It was a slow and boring film, it didn't seem to have any plot, there was nothing to it. The only good part was the ending, I just felt that the film should have ended more abruptly." ]
"label" :[ "neg" , "neg" ]
}
{
"text" :[ "The Bell witch is one of the most interesting, yet disturbing films of recent years. It’s an odd and unique look at a very real, but very dark issue. With its mixture of horror, fantasy and fantasy adventure, this film is as much a horror film as a fantasy film. And it‘s worth your time. While the movie has its flaws, it is worth watching and if you are a fan of a good fantasy or horror story, you will not be disappointed." ],
"label" :[ "pos" ]
}
# and so on .....ภายใต้ฮูดกลายพันธุ์ใช้สิ่งที่ยอดเยี่ยม? ชุดข้อมูลไลบรารีสำหรับการประมวลผลชุดข้อมูลดังนั้นจึงรองรับ? ชุดข้อมูลนอกกรอบ
from mutate import pipeline
pipe = pipeline ( "text-classification-synthesis" ,
model = "EleutherAI/gpt-neo-2.7B" ,
device = 1 )
task_desc = "Each item in the following contains customer service queries expressing the mentioned intent"
synthesizerGen = pipe ( "banking77" ,
task_desc = task_desc ,
text_column = "text" ,
label_column = "label" ,
# if the `text_column` doesn't have a meaningful value
text_column_alias = "Queries" ,
label_column_alias = "Intent" , # if the `label_column` doesn't have a meaningful value
shot_count = 5 ,
dataset_args = [ "en" ])
for exp in synthesizerGen :
print ( exp ){ "text" :[ "How can i know if my account has been activated? (This is the one that I am confused about)" ,
"Thanks! My card activated" ],
"label" :[ "activate_my_card" ,
"activate_my_card" ]
}
{
"text" : [ "How do i activate this new one? Is it possible?" ,
"what is the activation process for this card?" ],
"label" :[ "activate_my_card" ,
"activate_my_card" ]
}
# and so on .....ข้อควรระวัง : การวนซ้ำอย่างไม่แน่นอนผ่านชุดข้อมูลมีโอกาสสูงกว่าของตัวอย่างที่ซ้ำกันที่จะสร้างขึ้น
from mutate import pipeline
pipe = pipeline ( "text-classification-synthesis" ,
model = "EleutherAI/gpt-neo-2.7B" ,
device = 1 )
task_desc = "Each item in the following contains movie reviews and corresponding sentiments. Possible sentimets are neg and pos"
# returns a python generator
text_synth_gen = pipe ( "csv" ,
data_files = [ "local/path/sentiment_classfication.csv" ],
task_desc = task_desc ,
text_column = "text" ,
label_column = "label" ,
text_column_alias = "Comment" ,
label_column_alias = "sentiment" ,
class_names = [ "pos" , "neg" ],
# Flag to generate indefinite examples
infinite_loop = True )
#Infinite loop
for exp in synthesizerGen :
print ( exp )แนวคิดในการสร้างตัวอย่างจากรูปแบบภาษาขนาดใหญ่ได้รับแรงบันดาลใจจากงานด้านล่าง