mirror of
https://github.com/microsoft/BitNet.git
synced 2026-05-03 11:20:36 +00:00
[feat] I2S kernels for weight & activation parallel on Intel & ARM machine; [feat] I2S GEMV & GEMM(llama.cpp); [feat] quantize activation & dequantize embedding(llama.cpp); [fix] compile bug: cannot define __ARM_FEATURE_DOTPROD(llama.cpp)
This commit is contained in:
Vendored
+1
-1
Submodule 3rdparty/llama.cpp updated: 40ed0f2902...0f0e7daec2
@@ -0,0 +1,23 @@
|
||||
#define ACT_PARALLEL
|
||||
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
||||
#if defined(ACT_PARALLEL)
|
||||
#define ROW_BLOCK_SIZE 4
|
||||
#define COL_BLOCK_SIZE 128
|
||||
#define PARALLEL_SIZE 4
|
||||
#else
|
||||
#define ROW_BLOCK_SIZE 32
|
||||
#define COL_BLOCK_SIZE 4
|
||||
#define PARALLEL_SIZE 4
|
||||
#endif
|
||||
#elif defined(__ARM_NEON)
|
||||
#if defined(ACT_PARALLEL)
|
||||
#define ROW_BLOCK_SIZE 8
|
||||
#define COL_BLOCK_SIZE 64
|
||||
#define PARALLEL_SIZE 8
|
||||
#else
|
||||
#define ROW_BLOCK_SIZE 16
|
||||
#define COL_BLOCK_SIZE 4
|
||||
#define PARALLEL_SIZE 4
|
||||
#endif
|
||||
#endif
|
||||
|
||||
+928
-235
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user