Chroma Framework adalah aplikasi berbasis Python yang dirancang untuk mengelola dan mencari embeddings teks menggunakan model transformator kalimat. Kerangka kerja ini memungkinkan pengguna untuk membuat koleksi embeddings teks, menambahkan dokumen baru, dan meminta teks terdekat berdasarkan kueri input.
⛩️ Manajemen yang menyematkan ⛩️ -> Buat dan kelola koleksi embeddings teks.
Addtion Dokumen -> Tambahkan dokumen baru ke koleksi dengan metadata.
? Pencarian Teks ? -> Temukan teks terdekat dengan kueri yang diberikan menggunakan model embedding.
Penanganan Jalur Dinamis -> Secara otomatis menentukan jalur file relatif terhadap direktori proyek.
Klon Repositori :
git clone https://github.com/yourusername/chromadb_frameworkArahkan ke Direktori Proyek :
cd chromadb_frameworkInstal dependensi yang diperlukan (jika berlaku) .
pip install -r requirements.txtPastikan Anda menginstal Python 3.x.
Jalankan aplikasi dengan mengeksekusi:
python main.pyIkuti petunjuk di layar untuk mengelola embeddings dan mencari teks.
? project-root
├── ? config
│ ├── ? __ init __ .py
│ └── ? constants.py
│
├── ? src
│ ├── ? __ init __ .py
│ ├── ? client.py
│ ├── ? collection.py
│ └── ? data.py
│
├── ? utils
│ ├── ? __ init __ .py
│ └── ? helpers.py
│
├── ? .gitignore
├── ? .gitattributes
└── ? main.pyconfig.py/ : berisi file konfigurasi.
SRC/ : Berisi file kode sumber.
utils/ : berisi fungsi utilitas.
.gitignore : Menentukan file dan direktori yang harus diabaikan oleh git (misalnya, lingkungan virtual, membangun artefak).
.gitattributes : Memastikan ujung garis yang konsisten di berbagai sistem operasi di repositori.
Main.py : Titik masuk aplikasi. Menginisialisasi pengaturan, menangani operasi penyematan, dan mengelola pencarian teks.
from config . constants import MODEL_NAME , COLLECTION_NAME , INPUT_QUERY
from src . client import get_client
from src . collection import get_or_create_collection , add_collection , find_closest_texts
from src . data import get_data
from utils . helpers import set_def_llm , get_path
def main ():
model_name = MODEL_NAME
collection_name = COLLECTION_NAME
input_query = INPUT_QUERY
my_client = get_client ()
my_folder_path = get_path ()
embedding_function = set_def_llm ( model_name )
my_collection = get_or_create_collection ( my_client , collection_name , embedding_function = embedding_function )
my_documents , my_metadatas , my_ids = get_data ( my_folder_path )
add_collection ( my_collection , my_documents , my_metadatas , my_ids )
my_closest_texts = find_closest_texts ( my_collection , input_query )
print ( "Closest text(s):" , my_closest_texts )
if __name__ == "__main__" :
main ()Helters.py : Fungsi utilitas untuk mengatur model dan mendapatkan jalur.
from os . path import abspath , dirname , join
from chromadb . utils import embedding_functions
def set_def_llm ( model_name = None ):
try :
if model_name :
return embedding_functions . SentenceTransformerEmbeddingFunction ( model_name = model_name )
else :
return embedding_functions . DefaultEmbeddingFunction ()
except Exception as e :
print ( f"An error occurred while setting the sentence transformer. n " )
return None
def get_path ( folder_name = "texts" ):
try :
current_path = dirname ( abspath ( __file__ ))
project_path = dirname ( current_path )
full_path = join ( project_path , folder_name )
return full_path
except Exception as e :
print ( f"An error occurred while getting the folder path. n " )Client.py : Fungsi untuk membuat klien database.
from chromadb import PersistentClient
def get_client ( path = "vector_db" ):
try :
client = PersistentClient ( path = path )
return client
except FileNotFoundError :
print ( f"Database directory not found:" )
except Exception as e :
print ( f"An error occurred while creating the client: { e } " )collection.py : Fungsi untuk mengelola koleksi dan pencarian teks.
def get_or_create_collection ( client , name , embedding_function ):
try :
return client . get_or_create_collection ( name = name , embedding_function = embedding_function )
except Exception as e :
print ( f"An error occurred while creating the collection: { e } " )
def add_collection ( collection , documents , metadatas , ids ):
try :
collection . add (
documents = documents ,
metadatas = metadatas ,
ids = ids
)
except Exception as e :
print ( f"An error occurred while adding to the collection: { e } " )
def find_closest_texts ( collection , input_query , n_results = 2 ):
try :
closest_text_names = list ()
results = collection . query (
query_texts = [ input_query ],
include = [ "metadatas" ],
n_results = n_results
)
for item in results [ "metadatas" ][ 0 ]:
closest_text_names . append ( item [ "source" ])
return closest_text_names
except Exception as e :
print ( f"An error occurred while finding the closest text: { e } " )Data.py : Fungsi untuk mengambil data dari folder yang ditentukan.
from os import listdir
from os . path import join
def get_data ( folder_path ):
try :
documents = list ()
metadatas = list ()
ids = list ()
id_count = 1
for file_name in listdir ( folder_path ):
if file_name . endswith ( ".txt" ):
file_path = join ( folder_path , file_name )
id = "id" + str ( id_count )
with open ( file_path ) as file :
content = file . read ()
documents . append ( content )
metadatas . append ({ "source" : file_name })
ids . append ( id )
id_count += 1
return documents , metadatas , ids
except Exception as e :
print ( f"An error occurred while creating the data: { e } " )
return [], [], []Proyek ini dilisensikan berdasarkan GNU General Public License v3.0 (GPL -3.0) - Lihat file lisensi untuk detailnya.
Beri tahu saya jika ada detail spesifik yang ingin Anda sesuaikan atau bagian tambahan yang ingin Anda sertakan!