[feat] I2S kernels for weight & activation parallel on Intel & ARM machine; [feat] I2S GEMV & GEMM(llama.cpp); [feat] quantize activation & dequantize embedding(llama.cpp); [fix] compile bug: cannot define __ARM_FEATURE_DOTPROD(llama.cpp)

This commit is contained in:
deva100
2025-11-19 07:35:05 +00:00
parent 404980eeca
commit 112f853414
3 changed files with 952 additions and 236 deletions
+23
View File
@@ -0,0 +1,23 @@
#define ACT_PARALLEL
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
#if defined(ACT_PARALLEL)
#define ROW_BLOCK_SIZE 4
#define COL_BLOCK_SIZE 128
#define PARALLEL_SIZE 4
#else
#define ROW_BLOCK_SIZE 32
#define COL_BLOCK_SIZE 4
#define PARALLEL_SIZE 4
#endif
#elif defined(__ARM_NEON)
#if defined(ACT_PARALLEL)
#define ROW_BLOCK_SIZE 8
#define COL_BLOCK_SIZE 64
#define PARALLEL_SIZE 8
#else
#define ROW_BLOCK_SIZE 16
#define COL_BLOCK_SIZE 4
#define PARALLEL_SIZE 4
#endif
#endif
+928 -235
View File
File diff suppressed because it is too large Load Diff