
Hivemind是一個Pytorch圖書館,用於在整個互聯網上分散深度學習。它的預期用法是對來自不同大學,公司和志願者的數百台計算機進行培訓。
要了解有關該圖書館背後的想法的更多信息,請參見下面的論文列表。
本節列出了利用Hivemind進行分散培訓的項目。如果您在我們的圖書館的幫助下成功培訓了模型或在下游存儲庫中創建了一個下游存儲庫,請隨時提交將您的項目添加到此列表中的拉動請求。
在安裝之前,請確保您的環境具有Python 3.8+和Pytorch 1.9.0或更新。它們可以本地安裝,也可以與Anaconda一起安裝。
您可以通過PIP獲取最新版本,也可以從Source構建Hivemind。
如果您的Python和Pytorch版本符合要求,則可以從PIP中安裝Hivemind:
pip install hivemind
另外,如果要在數據傳輸過程中使用BitsandBytes的Blockwise 8位壓縮,則可以使用pip install hivemind[bitsandbytes]安裝它。之後,您可以在hivemind.com中使用BlockwiseQuantization類
要從源安裝Hivemind,只需運行以下內容:
git clone https://github.com/learning-at-home/hivemind.git
cd hivemind
pip install .
如果您想驗證安裝是否正常工作,則可以使用pip install .[dev] 。然後,您可以使用pytest tests/進行測試。
默認情況下,HiveMind使用GO-LIBP2P-DAEMON庫的預編譯的二進製文件。如果您面臨兼容性問題或想自己構建二進制問題,則可以通過運行pip install . --global-option="--buildgo" 。在運行編譯之前,請確保您的計算機具有GO Tool鏈的最新版本(支持1.15或1.16)。
如果您對安裝和使用Hivemind有任何疑問,請隨時在我們的Discord聊天中詢問或提出問題。
Hivemind目前處於積極發展階段,我們歡迎所有貢獻。從錯誤修復和文檔改進到全新功能的所有事物都將受到讚賞。
如果您想為Hivemind做出貢獻,但不知道從哪裡開始,請查看未解決的問題。打開新問題或加入我們的聊天室,以防您要討論新功能或報告可能的錯誤。始終歡迎使用錯誤修復,但是最好事先與維護人員討論新功能。
如果您想開始為Hivemind的源代碼做出貢獻,請先查看貢獻指南。要了解有關其他貢獻方法的更多信息,請閱讀我們的指南。
如果您發現Hivemind或其基礎算法對您的研究有用,請引用以下資料來源:
@misc { hivemind ,
title = { {H}ivemind: {D}ecentralized {D}eep {L}earning in {P}y{T}orch } ,
author = { Max Ryabinin and Alexander Borzunov and Michael Diskin and Anton Gusev and Denis Mazur and Vsevolod Plokhotnyuk and Alexey Bukhtiyarov and Pavel Samygin and Anton Sinitsin and Artem Chumachenko } ,
month = apr,
year = 2020 ,
address = { Online } ,
url = { https://github.com/learning-at-home/hivemind }
}另外,您可以引用啟發該庫創建的論文(在Mryab/the the-home中獲得Hivemind的原型實現):
@inproceedings { ryabinin2020crowdsourced ,
title = { Towards Crowdsourced Training of Large Neural Networks using Decentralized Mixture-of-Experts } ,
author = { Ryabinin, Max and Gusev, Anton } ,
year = 2020 ,
booktitle = { Advances in Neural Information Processing Systems } ,
volume = 33 ,
url = { https://proceedings.neurips.cc/paper/2020/file/25ddc0f8c9d3e22e03d3076f98d83cb2-Paper.pdf }
}“ Moshpit SGD:關於異質不可靠設備的溝通效率分散培訓”
@inproceedings { ryabinin2021moshpit ,
title = { Moshpit SGD: Communication-Efficient Decentralized Training on Heterogeneous Unreliable Devices } ,
author = { Ryabinin, Max and Gorbunov, Eduard and Plokhotnyuk, Vsevolod and Pekhimenko, Gennady } ,
year = 2021 ,
booktitle = { Advances in Neural Information Processing Systems } ,
volume = 34 ,
url = { https://proceedings.neurips.cc/paper/2021/file/97275a23ca44226c9964043c8462be96-Paper.pdf }
}“在開放合作中分佈深度學習”
@inproceedings { diskin2021distributed ,
title = { Distributed Deep Learning In Open Collaborations } ,
author = { Michael Diskin and Alexey Bukhtiyarov and Max Ryabinin and Lucile Saulnier and Quentin Lhoest and Anton Sinitsin and Dmitry Popov and Dmitriy Pyrkin and Maxim Kashirin and Alexander Borzunov and Albert Villanova del Moral and Denis Mazur and Ilia Kobelev and Yacine Jernite and Thomas Wolf and Gennady Pekhimenko } ,
year = 2021 ,
booktitle = { Advances in Neural Information Processing Systems } ,
url = { https://openreview.net/forum?id=FYHktcK-7v }
}“大規模安全分佈式培訓”
@inproceedings { gorbunov2022secure ,
title = { Secure Distributed Training at Scale } ,
author = { Gorbunov, Eduard and Borzunov, Alexander and Diskin, Michael and Ryabinin, Max } ,
year = 2022 ,
month = { 17--23 Jul } ,
booktitle = { Proceedings of the 39th International Conference on Machine Learning } ,
series = { Proceedings of Machine Learning Research } ,
volume = 162 ,
url = { https://proceedings.mlr.press/v162/gorbunov22a.html }
}“一起訓練變壓器”
@misc { borzunov2022training ,
title = { Training Transformers Together } ,
author = { Alexander Borzunov and Max Ryabinin and Tim Dettmers and Quentin Lhoest and Lucile Saulnier and Michael Diskin and Yacine Jernite and Thomas Wolf } ,
year = 2022 ,
eprint = { 2207.03481 } ,
archiveprefix = { arXiv } ,
primaryclass = { cs.LG }
}“花瓣:大型模型的合作推理和微調”
@inproceedings { borzunov-etal-2023-petals ,
title = { Petals: Collaborative Inference and Fine-tuning of Large Models } ,
author = { Borzunov, Alexander and Baranchuk, Dmitry and Dettmers, Tim and Ryabinin, Max and Belkada, Younes and Chumachenko, Artem and Samygin, Pavel and Raffel, Colin } ,
year = 2023 ,
month = jul,
booktitle = { Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations) } ,
publisher = { Association for Computational Linguistics } ,
address = { Toronto, Canada } ,
pages = { 558--568 } ,
doi = { 10.18653/v1/2023.acl-demo.54 } ,
url = { https://aclanthology.org/2023.acl-demo.54 } ,
editor = { Bollegala, Danushka and Huang, Ruihong and Ritter, Alan } ,
}“平行群:培訓大型模型可能令人驚訝地溝通效率”
@inproceedings { ryabinin2023swarm ,
title = { {SWARM} Parallelism: Training Large Models Can Be Surprisingly Communication-Efficient } ,
author = { Ryabinin, Max and Dettmers, Tim and Diskin, Michael and Borzunov, Alexander } ,
year = 2023 ,
month = { 23--29 Jul } ,
booktitle = { Proceedings of the 40th International Conference on Machine Learning } ,
publisher = { PMLR } ,
series = { Proceedings of Machine Learning Research } ,
volume = 202 ,
pages = { 29416--29440 } ,
url = { https://proceedings.mlr.press/v202/ryabinin23a.html } ,
editor = { Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan } ,
pdf = { https://proceedings.mlr.press/v202/ryabinin23a/ryabinin23a.pdf }
}“通過互聯網分發大型語言模型的推理和微調”
@inproceedings { borzunov2023distributed ,
title = { Distributed Inference and Fine-tuning of Large Language Models Over The Internet } ,
author = { Alexander Borzunov and Max Ryabinin and Artem Chumachenko and Dmitry Baranchuk and Tim Dettmers and Younes Belkada and Pavel Samygin and Colin Raffel } ,
year = 2023 ,
booktitle = { Thirty-seventh Conference on Neural Information Processing Systems } ,
url = { https://openreview.net/forum?id=XmN7ZNbUAe }
}我們還維護相關項目和致謝清單。