diff --git a/CMakeLists.txt b/CMakeLists.txt index b7a0c99..6ddaa51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,10 @@ if (GGML_BITNET_X86_TL2) add_compile_definitions(GGML_BITNET_X86_TL2) endif() +if (CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options(-fpermissive) +endif() + find_package(Threads REQUIRED) add_subdirectory(src) diff --git a/setup_env.py b/setup_env.py index b9bf5fc..8a9c4b4 100644 --- a/setup_env.py +++ b/setup_env.py @@ -34,7 +34,6 @@ COMPILER_EXTRA_ARGS = { OS_EXTRA_ARGS = { "Windows":["-T", "ClangCL"], - "Linux": ["-DCMAKE_C_COMPILER=clang", "-DCMAKE_CXX_COMPILER=clang++"] } ARCH_ALIAS = { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9cead70..bac8459 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,7 @@ set(GGML_SOURCES_BITNET ggml-bitnet-lut.cpp) include_directories(3rdparty/llama.cpp/ggml/include) -if ((NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang") OR -(NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) - message(FATAL_ERROR "Clang is required for Bitnet.cpp compilation") -endif() \ No newline at end of file +if (NOT (CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "GNU") OR + NOT (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) + message(FATAL_ERROR "Clang or GCC is required for Bitnet.cpp compilation") +endif() diff --git a/utils/codegen_tl2.py b/utils/codegen_tl2.py index 44d2418..4d94081 100644 --- a/utils/codegen_tl2.py +++ b/utils/codegen_tl2.py @@ -105,7 +105,7 @@ inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) {\n\ template\n\ inline int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut_scales) {\n\ #if defined __AVX2__\n\ - __m256 vec_lut[16];\n\ + __m256i vec_lut[16];\n\ const __m256i vec_bi = _mm256_set_epi32(84, 72, 60, 48, 36, 24, 12, 0);\n\ float scales = *lut_scales;\n\ __m256i shuffle_mask = _mm256_set_epi8(\n\ @@ -191,7 +191,7 @@ inline int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_t template\n\ inline int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut_scales) {\n\ #if defined __AVX2__\n\ - __m256 vec_lut[16];\n\ + __m256i vec_lut[16];\n\ const __m256i vec_bi = _mm256_set_epi32(56, 48, 40, 32, 24, 16, 8, 0);\n\ float scales = *lut_scales;\n\ __m256i shuffle_mask = _mm256_set_epi8(\n\