مكتبة معالجة اللغة الطبيعية وتصحيح الأخطاء السريعة والموارد المنخفضة المكتوبة في الصدأ. تنفذ NLPRULE نهجًا قائمًا على القاعدة والبحث عن NLP باستخدام الموارد من Languagetool.
تثبيت: pip install nlprule
يستخدم:
from nlprule import Tokenizer , Rules
tokenizer = Tokenizer . load ( "en" )
rules = Rules . load ( "en" , tokenizer ) rules . correct ( "He wants that you send him an email." )
# returns: 'He wants you to send him an email.'
rules . correct ( "I can due his homework." )
# returns: 'I can do his homework.'
for s in rules . suggest ( "She was not been here since Monday." ):
print ( s . start , s . end , s . replacements , s . source , s . message )
# prints:
# 4 16 ['was not', 'has not been'] WAS_BEEN.1 Did you mean was not or has not been? for sentence in tokenizer . pipe ( "A brief example is shown." ):
for token in sentence :
print (
repr ( token . text ). ljust ( 10 ),
repr ( token . span ). ljust ( 10 ),
repr ( token . tags ). ljust ( 24 ),
repr ( token . lemmas ). ljust ( 24 ),
repr ( token . chunks ). ljust ( 24 ),
)
# prints:
# 'A' (0, 1) ['DT'] ['A', 'a'] ['B-NP-singular']
# 'brief' (2, 7) ['JJ'] ['brief'] ['I-NP-singular']
# 'example' (8, 15) ['NN:UN'] ['example'] ['E-NP-singular']
# 'is' (16, 18) ['VBZ'] ['be', 'is'] ['B-VP']
# 'shown' (19, 24) ['VBN'] ['show', 'shown'] ['I-VP']
# '.' (24, 25) ['.', 'PCT', 'SENT_END'] ['.'] ['O']الإعداد الموصى به:
Cargo.toml
[ dependencies ]
nlprule = " <version> "
[ build-dependencies ]
nlprule-build = " <version> " # must be the same as the nlprule version! build.rs
fn main ( ) -> Result < ( ) , nlprule_build :: Error > {
println ! ( "cargo:rerun-if-changed=build.rs" ) ;
nlprule_build :: BinaryBuilder :: new (
& [ "en" ] ,
std :: env :: var ( "OUT_DIR" ) . expect ( "OUT_DIR is set when build.rs is running" ) ,
)
. build ( ) ?
. validate ( )
} src/main.rs
use nlprule :: { Rules , Tokenizer , tokenizer_filename , rules_filename } ;
fn main ( ) {
let mut tokenizer_bytes : & ' static [ u8 ] = include_bytes ! ( concat! (
env! ( "OUT_DIR" ) ,
"/" ,
tokenizer_filename! ( "en" )
) ) ;
let mut rules_bytes : & ' static [ u8 ] = include_bytes ! ( concat! (
env! ( "OUT_DIR" ) ,
"/" ,
rules_filename! ( "en" )
) ) ;
let tokenizer = Tokenizer :: from_reader ( & mut tokenizer_bytes ) . expect ( "tokenizer binary is valid" ) ;
let rules = Rules :: from_reader ( & mut rules_bytes ) . expect ( "rules binary is valid" ) ;
assert_eq ! (
rules . correct ( "She was not been here since Monday." , & tokenizer ) ,
String :: from ( "She was not here since Monday." )
) ;
} يتم الحفاظ على نسخ nlprule و nlprule-build متزامنة.
| | قواعد الغموض | | قواعد القواعد | | نسخة LT | وقت nlprule | وقت Languagetool | |
|---|---|---|---|---|---|
| إنجليزي | 843 (100 ٪) | 3725 (~ 85 ٪) | 5.2 | 1 | 1.7 - 2.0 |
| الألمانية | 486 (100 ٪) | 2970 (~ 90 ٪) | 5.2 | 1 | 2.4 - 2.8 |
| الأسبانية | الدعم التجريبي. لم يتم اختباره بالكامل بعد. |
انظر القضية القياسية للحصول على التفاصيل.
يرجى إرسال العلاقات العامة لإضافة مشروعك!
جميع الفضل في الموارد المستخدمة في NLPRULE يذهب إلى LAGHAGETOOL الذين بذلوا جهدًا شأنًا لإنشاء موارد عالية الجودة لتصحيح الخطأ النحوي و NLP الأوسع.
NLPRULE مرخصة بموجب ترخيص MIT أو APACHE-2.0 ، في خيارك.
ثنائيات NLPRULE ( *.bin ) مشتقة من LAGHAGETOOL v5.2 ومرخص لها بموجب ترخيص LGPLV2.1. يرتبط NLPRULE بشكل ثابت وديناميكي بهذه الثنائيات. بموجب LGPLV2.1 الفقرة 6 (أ) لا يكون لهذا أي آثار على ترخيص NLPRULE نفسه.