chromadb_framework chromadb

chromadb_framework

شفرة المصدر الأخرى

1.0.0

تنزيل

إطار Chroma

ملخص

يعد Chroma Framework تطبيقًا قائمًا على Python مصمم لإدارة وبحث التضمينات النصية باستخدام نموذج محول الجملة. يمكّن الإطار المستخدمين من إنشاء مجموعات من تضمينات النص ، وإضافة مستندات جديدة ، والاستعلام عن أقرب النصوص على أساس استعلامات الإدخال.

سمات

⛩ إدارة التضمين ⛩ -> إنشاء وإدارة مجموعات من تضمينات النص.

إضافة المستند -> أضف مستندات جديدة إلى المجموعة مع البيانات الوصفية.

؟ البحث عن النص ؟ -> ابحث عن أقرب النصوص إلى استعلام معين باستخدام نموذج التضمين.

معالجة المسار الديناميكي -> تحديد مسارات الملف تلقائيًا بالنسبة إلى دليل المشروع.

تثبيت

استنساخ المستودع :

git clone https://github.com/yourusername/chromadb_framework

انتقل إلى دليل المشروع :
```
 cd chromadb_framework
```
تثبيت أي تبعيات مطلوبة (إن أمكن) .
```
pip install -r requirements.txt
```

الاستخدام

تأكد من تثبيت Python 3.x.
قم بتشغيل الطلب عن طريق التنفيذ:
```
python main.py
```
اتبع المطالبات التي تظهر على الشاشة لإدارة التضمينات ونصوص البحث.

هيكل المشروع

? project-root
├── ? config
│ ├── ? __ init __ .py
│ └── ? constants.py
│
├── ? src
│ ├── ? __ init __ .py
│ ├── ? client.py
│ ├── ? collection.py
│ └── ? data.py
│
├── ? utils
│ ├── ? __ init __ .py
│ └── ? helpers.py
│
├── ? .gitignore
├── ? .gitattributes
└── ? main.py

config.py/ : يحتوي على ملفات التكوين.
- _ init _.py : يستورد الثوابت للنموذج والتكوين.
- الثوابت .: تحديد الثوابت المستخدمة في جميع أنحاء التطبيق.
SRC/ : يحتوي على ملفات رمز المصدر.
- _ init _.py : تهيئة حزمة المصدر وإعداد التسجيل.
- client.py : وظائف لإنشاء عميل قاعدة البيانات.
- collection.py : unctions لإدارة المجموعات ونصوص البحث.
- Data.py : وظائف لاسترداد البيانات من المجلد المحدد.
utils/ : يحتوي على وظائف الأداة المساعدة.
- _ init _.py : استيراد وظائف المساعد.
- المساعدين .
.gitignore : يحدد الملفات والأدلة التي سيتم تجاهلها بواسطة GIT (على سبيل المثال ، البيئات الافتراضية ، بناء القطع الأثرية).
.gitattributes : يضمن نهايات خط متسقة عبر أنظمة التشغيل المختلفة في المستودع.
Main.py : نقطة دخول التطبيق. تهيئة الإعدادات ، ومقابض عمليات التضمين ، وتدير عمليات البحث النصية.

أمثلة رمز

البرنامج الرئيسي

 from config . constants import MODEL_NAME , COLLECTION_NAME , INPUT_QUERY
from src . client import get_client
from src . collection import get_or_create_collection , add_collection , find_closest_texts
from src . data import get_data
from utils . helpers import set_def_llm , get_path

def main ():
    model_name = MODEL_NAME
    collection_name = COLLECTION_NAME
    input_query = INPUT_QUERY
    my_client = get_client ()
    my_folder_path = get_path ()
    embedding_function = set_def_llm ( model_name )
    my_collection = get_or_create_collection ( my_client , collection_name , embedding_function = embedding_function )
    my_documents , my_metadatas , my_ids = get_data ( my_folder_path )
    add_collection ( my_collection , my_documents , my_metadatas , my_ids )
    my_closest_texts = find_closest_texts ( my_collection , input_query )
    print ( "Closest text(s):" , my_closest_texts )

if __name__ == "__main__" :
    main ()

وظائف الأداة المساعدة

المساعدين .

 from os . path import abspath , dirname , join
from chromadb . utils import embedding_functions

def set_def_llm ( model_name = None ):
    try :
        if model_name :
            return embedding_functions . SentenceTransformerEmbeddingFunction ( model_name = model_name )
        else :
            return embedding_functions . DefaultEmbeddingFunction ()
    except Exception as e :
        print ( f"An error occurred while setting the sentence transformer. n " )
        return None

def get_path ( folder_name = "texts" ):
    try :
        current_path = dirname ( abspath ( __file__ ))
        project_path = dirname ( current_path )
        full_path = join ( project_path , folder_name )
        return full_path
    except Exception as e :
        print ( f"An error occurred while getting the folder path. n " )

إنشاء العميل

client.py : وظائف لإنشاء عميل قاعدة البيانات.

 from chromadb import PersistentClient

def get_client ( path = "vector_db" ):
    try :
        client = PersistentClient ( path = path )
        return client
    except FileNotFoundError :
        print ( f"Database directory not found:" )
    except Exception as e :
        print ( f"An error occurred while creating the client: { e } " )

إدارة التحصيل

Collection.py : وظائف لإدارة المجموعات ونصوص البحث.

 def get_or_create_collection ( client , name , embedding_function ):
    try :
        return client . get_or_create_collection ( name = name , embedding_function = embedding_function )
    except Exception as e :
        print ( f"An error occurred while creating the collection: { e } " )

def add_collection ( collection , documents , metadatas , ids ):
    try :   
        collection . add (
            documents = documents , 
            metadatas = metadatas ,
            ids = ids
            )
    except Exception as e :
        print ( f"An error occurred while adding to the collection: { e } " )

def find_closest_texts ( collection , input_query , n_results = 2 ):
    try :
        closest_text_names = list ()
        results = collection . query (
            query_texts = [ input_query ],
            include = [ "metadatas" ],
            n_results = n_results
        )
        for item in results [ "metadatas" ][ 0 ]:
            closest_text_names . append ( item [ "source" ])
        return closest_text_names
    except Exception as e :
        print ( f"An error occurred while finding the closest text: { e } " )

إعداد البيانات

Data.py : وظائف لاسترداد البيانات من المجلد المحدد.

 from os import listdir
from os . path import join

def get_data ( folder_path ):
    try :
        documents = list ()
        metadatas = list ()
        ids = list ()
        id_count = 1

        for file_name in listdir ( folder_path ):
            if file_name . endswith ( ".txt" ):
                file_path = join ( folder_path , file_name )
                id = "id" + str ( id_count )
                with open ( file_path ) as file :
                    content = file . read ()
                    documents . append ( content )
                    metadatas . append ({ "source" : file_name })
                    ids . append ( id )
                id_count += 1
        return documents , metadatas , ids
    except Exception as e :
        print ( f"An error occurred while creating the data: { e } " )
        return [], [], []