From fd3f355a0babdb802a8f60a0d28e02579b8a9f26 Mon Sep 17 00:00:00 2001 From: Yan Xia <59006636+sd983527@users.noreply.github.com> Date: Tue, 15 Apr 2025 14:53:56 +0800 Subject: [PATCH] update readme and setup script to support official BitNet b1.58 model (#171) * update readme and setup file for new model. * update model file name --------- Co-authored-by: Yan Xia --- README.md | 48 ++++++++++++++++++++++++++------ assets/header_model_release.png | Bin 0 -> 14848 bytes setup_env.py | 9 +++++- 3 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 assets/header_model_release.png diff --git a/README.md b/README.md index 013daa0..df6f718 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) ![version](https://img.shields.io/badge/version-1.0-blue) +BitNet Model on Hugging Face + bitnet.cpp is the official inference framework for 1-bit LLMs (e.g., BitNet b1.58). It offers a suite of optimized kernels, that support **fast** and **lossless** inference of 1.58-bit models on CPU (with NPU and GPU support coming next). The first release of bitnet.cpp is to support inference on CPUs. bitnet.cpp achieves speedups of **1.37x** to **5.07x** on ARM CPUs, with larger models experiencing greater performance gains. Additionally, it reduces energy consumption by **55.4%** to **70.0%**, further boosting overall efficiency. On x86 CPUs, speedups range from **2.37x** to **6.17x** with energy reductions between **71.9%** to **82.2%**. Furthermore, bitnet.cpp can run a 100B BitNet b1.58 model on a single CPU, achieving speeds comparable to human reading (5-7 tokens per second), significantly enhancing the potential for running LLMs on local devices. Please refer to the [technical report](https://arxiv.org/abs/2410.16144) for more details. @@ -18,7 +20,8 @@ A demo of bitnet.cpp running a BitNet b1.58 3B model on Apple M2: https://github.com/user-attachments/assets/7f46b736-edec-4828-b809-4be780a3e5b1 ## What's New: -- 02/18/2025 [Bitnet.cpp: Efficient Edge Inference for Ternary LLMs](https://arxiv.org/abs/2502.11880) ![NEW](https://img.shields.io/badge/NEW-red) +- 04/14/2025 [BitNet Official 2B Parameter Model on Hugging Face](https://huggingface.co/microsoft/BitNet-b1.58-2B-4T) ![NEW](https://img.shields.io/badge/NEW-red) +- 02/18/2025 [Bitnet.cpp: Efficient Edge Inference for Ternary LLMs](https://arxiv.org/abs/2502.11880) - 11/08/2024 [BitNet a4.8: 4-bit Activations for 1-bit LLMs](https://arxiv.org/abs/2411.04965) - 10/21/2024 [1-bit AI Infra: Part 1.1, Fast and Lossless BitNet b1.58 Inference on CPUs](https://arxiv.org/abs/2410.16144) - 10/17/2024 bitnet.cpp 1.0 released. @@ -29,9 +32,38 @@ https://github.com/user-attachments/assets/7f46b736-edec-4828-b809-4be780a3e5b1 ## Acknowledgements This project is based on the [llama.cpp](https://github.com/ggerganov/llama.cpp) framework. We would like to thank all the authors for their contributions to the open-source community. Also, bitnet.cpp's kernels are built on top of the Lookup Table methodologies pioneered in [T-MAC](https://github.com/microsoft/T-MAC/). For inference of general low-bit LLMs beyond ternary models, we recommend using T-MAC. +## Official Models + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelParametersCPUKernel
I2_STL1TL2
BitNet-b1.58-2B-4T2.4Bx86
ARM
## Supported Models -❗️**We use existing 1-bit LLMs available on [Hugging Face](https://huggingface.co/) to demonstrate the inference capabilities of bitnet.cpp. These models are neither trained nor released by Microsoft. We hope the release of bitnet.cpp will inspire the development of 1-bit LLMs in large-scale settings in terms of model size and training tokens.** +❗️**We use existing 1-bit LLMs available on [Hugging Face](https://huggingface.co/) to demonstrate the inference capabilities of bitnet.cpp. We hope the release of bitnet.cpp will inspire the development of 1-bit LLMs in large-scale settings in terms of model size and training tokens.** @@ -143,12 +175,13 @@ pip install -r requirements.txt ``` 3. Build the project ```bash -# Download the model from Hugging Face, convert it to quantized gguf format, and build the project +# Manually download the model and run with local path +huggingface-cli download microsoft/BitNet-b1.58-2B-4T --local-dir models/BitNet-b1.58-2B-4T +python setup_env.py -md models/BitNet-b1.58-2B-4T -q i2_s + +# Or you can download a model from Hugging Face, convert it to quantized gguf format, and build the project python setup_env.py --hf-repo tiiuae/Falcon3-7B-Instruct-1.58bit -q i2_s -# Or you can manually download the model and run with local path -huggingface-cli download tiiuae/Falcon3-7B-Instruct-1.58bit --local-dir models/Falcon3-7B-Instruct-1.58bit -python setup_env.py -md models/Falcon3-7B-Instruct-1.58bit -q i2_s ```
 usage: setup_env.py [-h] [--hf-repo {1bitLLM/bitnet_b1_58-large,1bitLLM/bitnet_b1_58-3B,HF1BitLLM/Llama3-8B-1.58-100B-tokens,tiiuae/Falcon3-1B-Instruct-1.58bit,tiiuae/Falcon3-3B-Instruct-1.58bit,tiiuae/Falcon3-7B-Instruct-1.58bit,tiiuae/Falcon3-10B-Instruct-1.58bit}] [--model-dir MODEL_DIR] [--log-dir LOG_DIR] [--quant-type {i2_s,tl1}] [--quant-embd]
@@ -173,7 +206,7 @@ optional arguments:
 ### Basic usage
 ```bash
 # Run inference with the quantized model
-python run_inference.py -m models/Falcon3-7B-Instruct-1.58bit/ggml-model-i2_s.gguf -p "You are a helpful assistant" -cnv
+python run_inference.py -m models/BitNet-b1.58-2B-4T/ggml-model-i2_s.gguf -p "You are a helpful assistant" -cnv
 ```
 
 usage: run_inference.py [-h] [-m MODEL] [-n N_PREDICT] -p PROMPT [-t THREADS] [-c CTX_SIZE] [-temp TEMPERATURE] [-cnv]
@@ -246,4 +279,3 @@ python utils/generate-dummy-bitnet-model.py models/bitnet_b1_58-large --outfile
 python utils/e2e_benchmark.py -m models/dummy-bitnet-125m.tl1.gguf -p 512 -n 128
 ```
 
-
diff --git a/assets/header_model_release.png b/assets/header_model_release.png
new file mode 100644
index 0000000000000000000000000000000000000000..0c955c930df9ac38d054ab2a363b0d255e5b1694
GIT binary patch
literal 14848
zcmd^mXIE2S^d=n(ML|RbM3IhwO7Ef~5PFvm0@4Z6dl3-<>77ud1Pn+O5(y+7mBUbu0Af`WokQ{#yt1qC&j
z{D1K|I`VHy!N^O0{`eZIKcc7|`*_tr4?rJ!Ky`tw8C
z4gBCpK_Qo=`9#$?$OeZB4r7}@sch-pvY)tk)oQ!(v!d`9Mzypk5kS?48vRF};F?Dt
zwAkKR78|m`;8$Xry^MpJOlcVn*=cmDK0mpnEiAn0Ui(U
zOQX@=vpf6MPphs>g#|C
z7TEgE9^y6rhOskWDn#tx5B@IN(s3MOp)_a1KpAfgGz0u*Fqu}Gz4|2XyaCxPi
zzo>i|u-C6b!r7)ftcQ{;TCW~XA);n2!rum+`rFMsu=_$4{`T1?y3fm3-w{ABp8CS+Dx5f17$c|
zh*?92*jZY0g+GQrR|{J>T1@GDeZF&N`C+AQL)SmixuB1{2tbhL&y)lxnt;q@+2kVyr@qByHY6bvOXz{qRpvdwo5{1##o&*T(3t
zso1=F2qxTru5@_I8`r!;$`VQm>QhT((W;niQL+e9oZ!V7*@J1VGWa!fgk!yiZ!N^KM-3M4
z-QGcAB!)CdAF1iq9{*F(jr@pee0C-UhYXcEH{-kOcTV4~CtXRpzR2}~Ek8pq*?MT@
z1*9Tu#;LP2dep@b5%IrabFz*=2&rjhjo){O%D-_GS{xoFF_^k(Q1IhjZy&hIt7Lti
z>scesG*IOh3*}WYnwt^Vf|l9rc3)tqDxs5eh<^seAbTlN|ANMpc=t^a(MfrEf2!Wz
zDD;`>4LSI_@PYV+zIXvDc5B|~qpz{s=pLJJfBQO>f4f%m^&Eh50gjDQA(;=+&cf&e
z<}N>jiOt?KEWymOO+$6yh!MscG>Ru5+jRaD?EA5-nYHG3!UN6@5XY&|>c{kUn^qXQ
zxPQlpep&CaI-m#nFrzSlszN(Wri^z`aqMQQ7upM5@eg9xY!x5?2Bd#6L^mI9*1?8~
zjC}8`2Z68eSKCrDD_sFiUBz`MZ2e9g-O6z2yGlN~<@b3jFPCpGMi&#moof9vfbV*W
z-(R6nK4FkO^&@X+umt>IeiXl=d_tklMZ3|QBN-s+!&~o`b%up5*S1EDl!4PKm;0iB
zJY6VDDL42aW2D+_phbZ7
z){lLD*;#M@@L)5YL{<)(0+cFeA4p#}a{ow}YeVk8eNtZv(1*LSZgnv~>`p$uG*VmW
z$0p&*a?iIySu(O2D2|;gq28MF?JSA=!G!*NS@ziFeaUTU^imXclsY%!~?{`)xl$Z@~}-}CnM@C?^nw9ap1b)E@pZ-P9e&fera!;Y}z8)
ziS4UJJ9e*kki#V*$=8(fPsrH&P~%lUJX{XYPsr{h%=t!F7Ei`qNw)7=(pec)P)DSp
z>Q&(|Az`xSPP6yY%BJY%E`G*llR?J*xO-9r+4)949<6_pz?ouuQ#gi~isY+Og1EX)vrD7`(45>=s7SDow)Gofm*9Wa*ku&9jVhloN
z2D9>HRaO+jL3|lOUH}l$1rGIf##rtag_h^LcHEG?bhyYJ4VMq5{(VR=vReimL4=Ep~euYIS>D!6}-ww&quc0_{rL?$3=0)n(l6e@lT=}9Ka!jud&C&L1YuT)MGdx&*g+NFY02V
zuF$zCowA3GCcA`-$XV#)#@dK+MbEfOQlzSro11&
zAECH4ZZvAASBss#yf!O!e88kYK_UuxcQJOJ7oae+qU7{TWjNW<(uB{qFQZqBcD=@3
zmNYP}k_HN!=jH-9ybn+{*xjboCp;_#$z0XU@%{_ZA-X;{IHKKaT~A|ZyN~xCI)RHU
zpS>7Jj}k4EArkE%(NxDI1JaR6nEeGWj#Npy9)}KRxKimKckp?sZQV+
zSy@?A$50HH(cg1Q_}DdB)7j0f9w$e~l<8$J4Wzpn{DfOrk3i2b?2Frz$-Td-Z)qJ}X+S=cwUgn9asMWO%Phf{P{id4Q&amB&cVB~V
zY5o@Zchus4m4U6LHvKs48)Mn<=+#dviC~{~wB4+B5kr4r^(sF<%P&jA
z(y)>IKP$+wRfQOi*6YE%I*(SVPw;gqX-dEDd1JtxjZQ2CuE2tVTTkDm{u&tQv;=-X
zTOo{r8+a&!Rtu-?T_7DuNCpv6L3enJo&Loo|D=-p3CzofRl<&9>-2zy!Klr3jMr|<
zXMg6QzrRPB2pH_+AnW};wEtO-6$@k4Xz-Xyuun*3u_or+oNrGod+%{GzpPfAAN{Gn
z|32)CiHT1~H6)D`_vrXqX@{~u@Pf{x+pZ74X@bGW*S0Hk#(YQJcN~=j?!!=~va1ZG
zk8(DY&KcbNIWQR*d8m3UXbsa9%LXDC!}jz1PIi99b;8h8?vol(Rgvy_nhN;4UskK@
zsJd@noC_>geq-FOw0y<9xF=(=SVP=`HyShDu!H-6ltKSY8m8y|iT)#nljaSJvIyoz
z4rFu2L+LB&XBu7ht}~55+w)r|dl@pH3l*rX;TmRAN1cQpbJbFCjVHSEuz5GcWsp4E
z=N*8OOmL3X^A2;yM*PZPmduu_u`$IbXKc&-8>YRpfcqfPL3!;HDeJy$%uyb+z4+EL
zSW4Tex#H>2eTCoWeBE1E1@dD?*Vhkg8Q0hQMPqVuv}!Vqz40kKJ`3!Ktq75k>2vQm
zoFj9G$CujApo`Ph1ya(z{=c|M^$4}P8rnC^Q@*FLRE}R(7i@Ml
z@;KYa0SbKpe)C;%BF#=(8?t1>Hy%Z~-IwPgEM{8um>zt*Z?Zn9u(+DhNAQ1{UK86?
z!5@mv&&oO$x2SBcVdI~5a~{6RrX$++qU^hQVTU%xMmk&>Q&jAVqy}VV|5%k1LTNim
znP_7@6>LOA7Q6MjeX_FY6aw<|mn`0kCuV4m)pK=2eM~JmR5}#+Mqhmfx!SG?nL{x~
zE9B)r>#a)i40+O`EncVFXdNl&;PdHtv);b5Fp!Fs|0hSQ2Xtv}vBMfToG1r7joSq7<_xtFr>uX*k3E
zJZv67y|QFz15tcf(={~cP1mS*wx&@=QHJdW$PJ(+3sWB`amOcIV`c5Mv|<(nT$g*k
zWv3*tUYdL|j=B!$~%{HSBk-e%(f0Tdc
z{cepdY`_U_2bbqC$_x2jk*SS6JBg;}CU~{Wd>A!6|E@bZ%QR*;2_pZ-Y|k3@q__01;f)LtkRR
z!fj^$daRcIBOazHyTYts@HYCJfO{tezN+pxAmCFL$q&fPid243ljy0Wjd(e|lHgPM4N0SthRZ?QYH|@~A1S0gQo~lmq#QMVPn75|>S9*;
zPl*kS$q&#wJKt8X8w>X_g;hAu)~VJN+7R4h$}C~upfexM8&QSf9jiq?^8vQS1a}w+
z?E{xPIZ6~ST~(;VzIH_sd=@ocD7O;2(G2FpiueR*TY;
z<%RvROJPrc6){`%S_ozyw)9ue$@igVeJ;H|TbC5@L{*Jn6c_X#)dVyHxkpF8<$W8;;m@p5A|HKKRmh1kq|d6VKs
z*t*-y%|T}hFV#MC`5+uU!a?YA#UR?#WW{;#9k_jss>D~8|12!rzG
zHdQfW0fxhkj-$i3u)kp)x));mPo`6hes~n8el}8zRgMjkl3Hn8)BRPyF(s+FJ+J#|
zia;(W@Pf&gF0*|}I2#L>ZG^H8CqaJ4iPlgGQu5eMNsD$IGM
zsNpBD3i44xYeu-ay}~;;DV}>!1HydPeW9?v*s_|I+?a>K6`c#8)!h-%l8!(Vk%q32
zBlzlfC`qRWh)U0U5gh!yk4xVx!pP0gkVkaJCgA#TWs0(-96k5liA1lq%d$jo*Al4v
zl#6@HkTTY~`D=p@xapp(mleSh;yU)b6{y+=t8Cq1IXyuF%j&ul6WXfj*cr0A-X#^IF_I73P*y$JMo(OzNyQVj0oQlP{?fS1?)OY*pSmddJX9YWDU3*v+3N_becM_
zFEVN(EzyGVjTU{n_33%6N8PO-$w?}aFw@2{tIzk8!eg-pZn_pQT?kx0e&ysq?@ZLS
zJ9&E|vNh0by)cjOh+cnvC~`9L_GDcStN|dj
zH(>^_4lFbT$zUDSJkhn|4c$NVBA0$n%hUjRnh4w4WQF58bI@^8?$)4O6INOW$|zVj
z+jOknxwI7j85f@p!a;0P%RlK1*;`7Fh439`PTC)HSrk=dXU%j+ce92dwmo}*!7;;?
zc?nmRAdT$E4sDt6hzLlNU!c)dK_yK*0D^<;~BPE}w6V$H|I`B&yiX
ztQeVQPfNQzXp-Hmb^FNdYuM<~?Z+=EncHVxDs<&Rv3iyhl20KL*~a3eP;6}cEk^;7
z*&op?8pAzC^Z-VA>}pBpO%FQvR0wxT4M1)9kArjp0ePNIW6-dk3ggjA9IvJMj)Qz%
zD?XM8ZH`bW*IF9*RHThvfz`db+X^Y*7y2Hm5F2r*+R;E<EH1(5?!5r5Jti{L6pcp-~UNjCgc)oaUxzQNMdlMp2)
zvfFXpFFjOI0HZCS6y5;^1&LQAw&R{i_`4AgH_an24Zot
zO!?`j72VYz*D0Hn=QMkE@}1o)^UfiOBR*mB=zyUDq19VR#$1eI)m`KuS#1%Q
zl|4IQ?Mzn3Iuldl7CV$a?~>c$$um2uzR1+*svZyhm=3r
zU8sGcl85_{Qdw!7HJ}YVa9kS;5C=ur+5_Ydg4^4y_8X?8s(u)@TdJ_P<|zlitV~E)
zQ})}`D>z+i?uQC@FE4L1kLw(Sm+;O?)@zXU^ksodI!g+gO8)rTaawi_YE2HArxKx)}e>3o)q7lT*HmNW`Y;5Xw+LE#+U0Oe_YV+?vCgi|J
z5bnvM|LyzMob5mSb9Jkr=-gL6mY2#>I2@I0N`|@%A7#UMBV>4cTs(s{vulVnnoykM
zb8L*UUJs1}#1SdJ)zYA^-2F0jX-<(Jp46U=#B$h#bCT7xUxe#`WK5%{yQSP-=(Fd-
zF~>ohjv=4$&9((g{&({3BEt*|wLSV4x{Z^ykv{`;@8d2aPMNxVq@}*6tm;g#$Ru*cHPK*c9``CD_^0AJcvRXi^Jv$f~=Na*NHx+aD
zYZ~XUK{_hr$)>6`Hv_B#@(km+i-;VfUZ~8FU7hgW=O+Pq9THwSVu1FY1m$eQy7r3k
zWQAT?)mPc%q=N)j8)hWn>)x6icQgGZ5Mj;DvN!Fpjh$*JGYmwr)m$KADgih|Br!&>#HHS*Geg_A;MZUm@5V7%M6(l}dZ
z>=5%G$Zn!WsThA<=|aTit^CFB(00$|FNEP)KV9udUebP&VV?;)aF;X!UjR9Un5Ir;
z4PSG}ud$?8JZ&lxNl1u75Y=vIQ$;0lv~%EO5A8H5ww5x;*_IeO$`c&>Y3LaL!mK;k
zo`;+;TSl+D*-Z$Ce^{H)x?$^sY%{;{`I@}3e6|XPcL5dX(7ip@p;SL*vy(Gq*%84w)o6=D1NzFF)Fjg%FJRN
zU6~8PYcBQ3hX{#g{dhembY-+V0T$677^CLp?>}nm%9CU-yV9SgEbTln8dn}SHk|5d
z{`mGnF^RAlBX7NqJ>^({RA>A!_A{!iKpEaPJ@E;4`k{d?^@R)w^!d+?S
zidNdEP(GUu^Bt*VuHPLD5gVSnEAUM%@fB>r)atHG%xW>S;{LUIkL%t;N}Z5ODy7hM
zhgF#OGx_qs%3c6ITLrb!Psy)4?z@`^xl7JA-Os#hbTPX<Bwm(Mjk3}^x&gEW{<=5M18oqNGpDiUKyrc@e
z{V6LkLwD4-jmtl?h}8+yF$g5#S}Kbk*KAK(hUglnhN}eC7dbe~%VcI9(KArfAE#vK
zLhgd{<~LhBHmzEQ@?{NdI2o_q67OPC(7tI0>V3y<)M;kn#>8^kb_y-JL&xwI5jd8?
zetKi<#&Pvn0nESJSRCa$bZGW7L|DjR9h^fD`Zn8P-C<9gX;J@>yK4Ld-**6sGH#Y~
z^)huk@;OR2woO)z$+T!SCB?dUnXa#uIkgoAqyvrp2L^Uf)ygPGL$a?^7+Py+8vz;O
zi^O2pc#S4*venBMnMu8ItQ!1aN*uJW-gka}+|gy7G-WK>YwWMVUDwbF%M~+=yb{x^IjSESOn#oj?nSGeuU`M~1QALnStTp21
z;aD-js`MKJHl4a%N{z46qKpaIt*m$=5|MH2=H}*74wtX@0iUoN#L%gcRDd_{MRsUoA(cxN
zKypD5oHK`TY1dQ9aL^pFv$6EP(9QZmZluY31iyer~Zu1$B657a5T
zJJGG(^Mo6SF$`zmDTQ|L%O%{olAdEqE~lHn!TjP`4$?oKao(ZZ{k#}O&n+3r?Y29p
z(CJ=>Y1x+dO9f`ObyO=JIoiQcymo~uw%o!p+A?z#ipPRm>(x6ed+%d-fBJ{*oZmV?
zv_`V|cT$d!fJlCEVRlhygBgUdfOEjw{-u|Bd3-C
z;$B?~GSx+vGcx6>Bw48Gz}2RjdtVx
zTPUyTo}oet-mpwF(o6EZAN1qh;X`#Hay|0-t}^jb&|=czQTdpbxTc*vxzgwuuiWH4
z^D5p8MJOOTR{sE1am;KlE>?f0(LB;ulm96la_UdksnsP#kL*KJvw&Eoz}oiS&2fm~
zG8+4=_ll7Y*OlQTVKxsW&(CcJEr(ka-sI4a=?H_i9g1(vaxcH<{mB~7Ro79q$}8|w
z;6<9o)=uuwgBFwoq+#Tw#6^(zD1#r24@q+oSx{xT&hkGi6*22Q$Sr%?W7*q&kZm_4
zV#o0OENp@7rCJ4tUMDyPu0wW!;YwR0>c{%P@Dr+&O^(x(R(zDDR~oDY-*Is+?9&yh
zI{p)h60!;V_1R2(A=jaNcJ}O~Pv+yFsE)mY-FDo!)MwAtwA5oFh}9{?(&Q)e0}q)>
zCd*H_$8Xl;V!I}PmqI=Uo%f+;cPyB!FN~|Mmaw&+O2o-t+<~^#)VKFQxvIS?Kpoz)
z;WGKL{rm4bO}P|DItKzcbKzgTKQ(i1emB1QO1pcXE3l)75HLhr5jPTHy%0p($X@|V
z6)~OWzJScF9jM5TkPwuK8SP@qfGHfzi<~-l99Q{0CKr7=xdQ*xcM4w8)%S@Vi?^%s
zbovfqS~|^aHDUKa(wN7;89Df^VIOU{h^3QYE#lah;N5choz&Z-Tm$KRod@Xd&moV^-pgtuIkME)H9ugjjg5L;loI>n`e9b9qmggm`6QI(3?F>0P$$Gfk4Y
zT*12ZiZ;)j+3}X_$UvTh%F)zBUUk@;NBbO0+TO=cFOELc;W@d-Lh03RGQ?Jr8aNR&
zcE8y!I(&xb@~%#zYkqvLRG%WL@j>wKLeH^EP~Y_?8G|zQ?m%%?-n_NNJ#1x4TBtVq
z*3{GqJ+-eW>RH~gSNYP+Cd8{%XQ+YW-ov4I_<_ytOC}AOo6ui2+N;HTSRATuX0Z53
za15!$o_q5dayIRr|CIA{tgiED`4SYFroJx6Q!164!x-Y*7VPlrs~zgAEG7prUU>`E
z$G|15Ljul!WKNO^_^9nZeGgN?_x3#N?;Ji$h=aQ8nbJ@A3TwDhp`#nTwdD4*F;w0<
z3%|zLJj6w;9nuq7c$y7?N=H};9Qn)qBG)IyXo+iMfrf%tJe1@Ac-X({Kei6mS!VVE
zPT9Xq_iFJFyvTJKa8>nQLumm&-g$65?psyO-loqt&5F}%alKTupcDSpS7TFF1Dl&F
z)&f~sJ&G)vHaaIF#f#HTH=CRH_u|xZ?vVo$Y`NnM{HBHs63Rr*%KgOW?!(0d`=Amy
zGxe%ue-5OgHuB34X{C&cio=(v9xTzq$N|k>;ob&83O%benwZ#LT<=LlFSh+S`9`8sW{14nY|FCB7sCKs9lseXX)O`K|&9=@bfq>z=JKcrdZbP{51
zsnd7)LNIrteC5T0TZ{a+$gw{)6lk_y0m@t-<%@8b9laSUxHsc`Je~>1id~O5I|6vl
zmNO?w0gK)to2y$cdCZiKhvJ3&u{8iL-*<)zo7WI`XLJ!nupv64T0Hn{bV{792Ybsj
zJL6Iq6CaW>T|#B`%_Np?Z2#m*f5CN9Xq{v0vl%bR$G6Wby1A7KRL@!*g|3fJgCbR+
zN3Xj^aLXR8e
zXL$;i*T~$v{P(j12T&Zw%FY2)l!R{S3Xj-Bve(nFL_{b17W%9#o9PxfK37mz4XrmZ
zzrGRXAS~145&F+}DKsOi`3D47g&yN<506#f)n8sl8Sl*47S!Hio;6wbp>}D=-Vb44AnlKnY#X!KSrK;q+1k4jSsX}d$&v0|P~4ro
zU?-nefyZQT&%>#QcjQD~Aa8QQ?=D^NCoioEfA;}NvYWcXySyR;woyJlMhaEEI{ZdHTi?>@PP=vV5?
z*36MmRelK60%+k1#|1<UolSic_;y91*<-j-J{^nAzHz(xZXU-pSYR3N-}*lMu1)(FXO`(Dc)TxS#8plWf&i)VPc!=|Q;gh9ilnxdG-;KrECvlDK2F_@+p21@k{!A4es!^e%mjHC1zL-phjfYZ25(pE
z$+;~L&;!21aBK6tIePxC^V*(MdrxA!e1nB0m_X4IDPq`E3fCHDEDg}bP5It)^6x@>!^>SSBpmG+QKZxlz2$6bXsK4|%s
zD`zy%$rrkPu~!aqX#M$BQNJZQYi$|n!=Y?t(fG0ILWJu1I49y8y70F!%?wA9Ow-obC?NtEIgl@gm6fu845|m&K)?_^iZH!x^!?z4;p4tad&x`?vSY-9ie$_GOYZ
zbolinpN^`k^f*-=9Zw)f(+@IXF2}~-e~Y7@?Dq}(3@Lr-6`)
zTN>P6WaRep@|QceY7eSudu@61f&#YUz}oiQgLc%Xo|d{#?f3m{LfVLp?H-3OS4@mp
ziSpJKZU|Gd&hsssI(?O+ud#3K>G^vqoLddB>}|Yr~Brjrs;IqW7d&Q0+
zt8VIn*Gt6_!V|z8@u|2V
z>?myz27Jg-0GDsE7y0J)(Hj19Y~Ge+!TsnDfh}szFLdwpnT^+`h@AOYr55grcyqIB
z`_YgbxjL5l$*1+Wwm3^&3mOvbC?V%%pg!E5`kwu(CLYj-*o%DnmN2HzrvZ?Vz@z0O
zOC_1N+z}LmN-~GAus;JGeW=wPY9M=@h-QY^4U+^8jsexIp6P}aykPgc5#JqkewYGV
z0g(X!^U8wn>#5W*pI<2qINBJV{bAEv3-##IAKXr!`O;}uy(uV
z5n0v*USY<-$S7#l2aS1}KrG!|bE+_6LmL;WN$3VS$9ZS(ur**JQiqIHCW5dacwLt}
zKLQ=NuIk$815UwC{#**Ee3fCVib0S>(Q2EpVww_aI*K?hx{VHn`tF-7@0ItjZ6(^~
zqFJD9cb-z{DN_2a-9isORQ!p
zxLM6b^%#1s9Kl$!a8T`z>E$5I>gbBXuLd33+l}yGF0TLJ0E4dmM>S0+sbUyqG)_;q
z7njSniTDoh)Fz0izo^;l*_>Ua!WDr3QCUZJh
zUmzlOaOF8waJsN#SC4t}_ex2z1VxgwGT+%7y8fG%u{|Bw5=y~t%U~7AHCzKy
zzH+!OvQQXU8X34w^M~9xAZ0i)Rc!%0+3&1vU2m!RG^R`D@P^d7c+u&~s@@P^N2}x`
zT#Ysw6%rD`l;xrLti9WR1*4am-0DCkzqf~AS`6?A=8v}GB($dtWIpul==fQKt^(uV
zyv$!dm-2545{ovcT#9phUk-3l=ERh4WaLzwc-a&1O84|5P0Q<{`%_P
z)UxL~CB24&MrBb|%4K)bDcAxZMcFVB#if4pys-(U<@uVv-R~z#W*cx3?;gIDU&$Te
zU?-F<@zL4Y0=3;qK=|E%vfDN$9W%Pz2d*0xD5jxfo{amkKEs+T$2#r6Y=>`e=ir8~
z@j&FsL6$O&?DCa`UJ<_faa=o$7`i*q`LY}H*kNzu{9L4p_pjK_1U91AzYeYokH}p(
zQ#I6N@0iL^Ku}UhjIKhD{tZJKqBW2{VET4I0lBq;EbDk>W;xz~XH&JK`FOEbiq#?x
zg^!<~&5u((6r}0TrQGB3ER1#?i|P}nC-+9h{UcN_7m{0Y0&v2rg*cE=ckvkw
zx#fa<{{7+^9Tfs-YCk{f5b#9X95eYV5Vu?wf(PXuy1M#3>rUQLs&*(2=e>t|+3ix4
z^ZWTi+iny!^LX5Uof>~ey~?iT?OK}I)IsLZ`CVQ#wc#
zLU`*b2m#tc(y;+^FL(HxtQAgV%5X@KCGNjYfIH;Yp1n`O65itmqiXNWs#`)gZj<)c
zZleZ2<67$^LtbagG%hbUbg8E#uaIv9inOruQ~c?sxc65rJ)!;63+1f0u#a3;&=)`1
z6d|3F4aFDwZYB{iI$ag(GZhzo4^pWMrmCXqf}rpYlLlAhnXd=?|9(9|72=g+y}`(|
zhq?1*jr;69^JRfzhK(lHd(U47qQPBpT2o@=Xx#=LxGGcj_bbQw?+NM{3-Y4h>|@H9
zFNYs%$lUAZEMty`|Gr>1eeZu-%Y+O8LP{BbneuN1Sr04zzwX%kzucs_LZV!pyPOiL
UDQQIR+N97_(|uC=$nMqu0{vpyE&u=k

literal 0
HcmV?d00001

diff --git a/setup_env.py b/setup_env.py
index 9256324..8011872 100644
--- a/setup_env.py
+++ b/setup_env.py
@@ -41,6 +41,9 @@ SUPPORTED_HF_MODELS = {
     "tiiuae/Falcon3-1B-Instruct-1.58bit": {
         "model_name": "Falcon3-1B-Instruct-1.58bit",
     },
+    "microsoft/BitNet-b1.58-2B-4T": {
+        "model_name": "BitNet-b1.58-2B-4T",
+    },
 }
 
 SUPPORTED_QUANT_TYPES = {
@@ -161,6 +164,8 @@ def gen_code():
             run_command([sys.executable, "utils/codegen_tl1.py", "--model", "Llama3-8B-1.58-100B-tokens", "--BM", "256,128,256,128", "--BK", "128,64,128,64", "--bm", "32,64,32,64"], log_step="codegen")
         elif get_model_name() == "bitnet_b1_58-3B":
             run_command([sys.executable, "utils/codegen_tl1.py", "--model", "bitnet_b1_58-3B", "--BM", "160,320,320", "--BK", "64,128,64", "--bm", "32,64,32"], log_step="codegen")
+        elif get_model_name() == "BitNet-b1.58-2B-4T":
+            run_command([sys.executable, "utils/codegen_tl1.py", "--model", "bitnet_b1_58-3B", "--BM", "160,320,320", "--BK", "64,128,64", "--bm", "32,64,32"], log_step="codegen")
         else:
             raise NotImplementedError()
     else:
@@ -177,6 +182,8 @@ def gen_code():
             run_command([sys.executable, "utils/codegen_tl2.py", "--model", "Llama3-8B-1.58-100B-tokens", "--BM", "256,128,256,128", "--BK", "96,96,96,96", "--bm", "32,32,32,32"], log_step="codegen")
         elif get_model_name() == "bitnet_b1_58-3B":
             run_command([sys.executable, "utils/codegen_tl2.py", "--model", "bitnet_b1_58-3B", "--BM", "160,320,320", "--BK", "96,96,96", "--bm", "32,32,32"], log_step="codegen")
+        elif get_model_name() == "BitNet-b1.58-2B-4T":
+            run_command([sys.executable, "utils/codegen_tl2.py", "--model", "bitnet_b1_58-3B", "--BM", "160,320,320", "--BK", "96,96,96", "--bm", "32,32,32"], log_step="codegen")    
         else:
             raise NotImplementedError()
 
@@ -222,4 +229,4 @@ if __name__ == "__main__":
     args = parse_args()
     Path(args.log_dir).mkdir(parents=True, exist_ok=True)
     logging.basicConfig(level=logging.INFO)
-    main()
+    main()
\ No newline at end of file