Author: Michael R. Crusoe <crusoe@debian.org>
Description: support non-x86 systems via libsimde-dev
--- kmc.orig/Makefile
+++ kmc/Makefile
@@ -53,11 +53,16 @@
 
 	LIB_KMC_CORE = $(OUT_BIN_DIR)/libkmc_core.mac.a
 else
+ifeq (1,$(SIMD))
 	RADULS_OBJS = \
 	$(KMC_MAIN_DIR)/raduls_sse2.o \
 	$(KMC_MAIN_DIR)/raduls_sse41.o \
 	$(KMC_MAIN_DIR)/raduls_avx2.o \
 	$(KMC_MAIN_DIR)/raduls_avx.o
+else
+	RADULS_OBJS = \
+	$(KMC_MAIN_DIR)/raduls_sse2.o
+endif
 
 	LIB_KMC_CORE = $(OUT_BIN_DIR)/libkmc_core.a
 endif
@@ -92,6 +97,7 @@
 $(KMC_CLI_OBJS) $(KMC_CORE_OBJS) $(KMC_DUMP_OBJS) $(KMC_API_OBJS) $(KFF_OBJS) $(KMC_TOOLS_OBJS): %.o: %.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
 
+ifeq (1,$(SIMD))
 $(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -msse2 -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_sse41.o: $(KMC_MAIN_DIR)/raduls_sse41.cpp
@@ -100,6 +106,10 @@
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_avx2.o: $(KMC_MAIN_DIR)/raduls_avx2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx2 -c $< -o $@
+else
+$(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
+endif
 
 $(LIB_KMC_CORE): $(KMC_CORE_OBJS) $(RADULS_OBJS) $(KMC_API_OBJS) $(KFF_OBJS)
 	-mkdir -p $(OUT_INCLUDE_DIR)
--- kmc.orig/kmc_core/cpu_info.cpp
+++ kmc/kmc_core/cpu_info.cpp
@@ -37,6 +37,7 @@
 	string vendor, brand;
 	void cpuid(int *result, int function_id) const
 	{
+#if defined(__x86_64__) || defined(__i386__)
 #ifdef _MSC_VER
 		__cpuidex(result, function_id, 0);
 
@@ -52,10 +53,12 @@
 		__asm__("cpuid\n\t"
 			: "=a" (result[0]), "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) : "0" (function_id), "c"(0));
 #endif  
+#endif
 	}
 
 	CpuInfoImpl()
 	{
+#if defined(__x86_64__) || defined(__i386__)
 		array<int, 4> cpui = { -1 };
 		cpuid(cpui.data(), 0);
 		int nIds_ = cpui[0];
@@ -89,6 +92,9 @@
 			std::bitset<32> EBX = data_[7][1];
 			avx2 = EBX[5];
 		}
+#else
+		sse2=true;
+#endif
 	}
 
 	const string& GetVendor() const
@@ -144,4 +150,4 @@
 bool CCpuInfo::AVX_Enabled() { return cpu_info_impl.avx; }
 bool CCpuInfo::AVX2_Enabled() { return cpu_info_impl.avx2; }
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmc_core/intr_copy.h
+++ kmc/kmc_core/intr_copy.h
@@ -15,8 +15,8 @@
 #include <intrin.h>
 #endif
 
-#include <emmintrin.h>
-#include <immintrin.h>
+#define SIMDE_ENABLE_NATIVE_ALIASES
+#include <simde/x86/sse2.h>
 #include "critical_error_handler.h"
 
 #ifndef _WIN32
@@ -32,7 +32,7 @@
 	__int64* src = (__int64 *)_src;
 
 	for (unsigned i = 0; i < size; ++i)
-		_mm_stream_si64(dest + i, src[i]);
+		simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 }
 
 
@@ -46,7 +46,7 @@
 		__int64* src = (__int64*)_src;
 
 		for (unsigned i = 0; i < SIZE; ++i)
-			_mm_stream_si64(dest + i, src[i]);
+			simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 	}
 };
 
@@ -95,4 +95,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmc_core/kmc.h
+++ kmc/kmc_core/kmc.h
@@ -1523,6 +1523,8 @@
 #ifdef __APPLE__
 	sort_func = RadixSort::RadixSortMSD<CKmer<SIZE>, SIZE>;
 	CSmallSort<SIZE>::Adjust(384);
+#elif !defined(__x86_64__) && !defined(__i386__)
+	sort_func = RadulsSort::RadixSortMSD_SSE2<CKmer<SIZE>>;
 #else	
 	auto proc_name = CCpuInfo::GetBrand();
 	bool is_intel = CCpuInfo::GetVendor() == "GenuineIntel";
--- kmc.orig/kmc_core/raduls.h
+++ kmc/kmc_core/raduls.h
@@ -24,6 +24,7 @@
 	template<typename KMER_T>
 	void RadixSortMSD_SSE2(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
 
+#if defined(__x86_64__) || defined(__i386__)
 	template<typename KMER_T>
 	void RadixSortMSD_SSE41(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
 
@@ -32,8 +33,9 @@
 
 	template<typename KMER_T>
 	void RadixSortMSD_AVX2(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
+#endif
 }
 
 #endif // RADULS_H
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmc_core/raduls_impl.h
+++ kmc/kmc_core/raduls_impl.h
@@ -759,7 +759,7 @@
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_AVX
 #elif defined(__SSE4_1__)
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE41
-#elif defined(__SSE2__)
+#else
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE2
 #endif
 
@@ -798,4 +798,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmc_core/splitter.cpp
+++ kmc/kmc_core/splitter.cpp
@@ -361,7 +361,7 @@
 				if (!both_strands && is_rev_comp) //if read is reversed and kmc was run to count all (not only canonical) kmers read must be transformed back
 				{
 					//static const char rev_maping[] = "=TGMCRSVAWYHKDBN";
-					static const char rev_maping[] = { -1, 3, 2, -1, 1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1 };// "=TGMCRSVAWYHKDBN";
+					static const int rev_maping[] = { -1, 3, 2, -1, 1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1 };// "=TGMCRSVAWYHKDBN";
 					uint32 n_bytes = l_seq / 2;
 					uint64_t pos_after = pos + l_seq;
 					pos = pos_after;
@@ -380,7 +380,7 @@
 				}
 				else
 				{
-					static const char maping[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1 };//"=ACMGRSVTWYHKDBN";
+					static const int maping[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1 };//"=ACMGRSVTWYHKDBN";
 					uint32 n_bytes = l_seq / 2;
 					for (uint32_t ii = 0; ii < n_bytes; ++ii)
 					{
@@ -1043,4 +1043,4 @@
 template class CWSmallKSplitter<uint32>;
 template class CWSmallKSplitter<uint64>;
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
