hashkill: fix build kernels for nv with low mem

This commit is contained in:
Yury Martynov 2019-11-09 16:27:42 +03:00
parent 53c11c8173
commit 119bc3ba08
No known key found for this signature in database
GPG key ID: EBE62DD0CCEAE19E
2 changed files with 211 additions and 20 deletions

View file

@ -1,6 +1,6 @@
diff -ur a/src/kernels/compiler/amd-compiler.c b/src/kernels/compiler/amd-compiler.c
--- a/src/kernels/compiler/amd-compiler.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/amd-compiler.c 2019-11-08 16:06:56.030308820 +0300
+++ b/src/kernels/compiler/amd-compiler.c 2019-11-09 07:35:45.789677199 +0300
@@ -294,7 +294,7 @@
if( binary_sizes[j] != 0 )
{
@ -21,7 +21,7 @@ diff -ur a/src/kernels/compiler/amd-compiler.c b/src/kernels/compiler/amd-compil
}
diff -ur a/src/kernels/compiler/compiler.h b/src/kernels/compiler/compiler.h
--- a/src/kernels/compiler/compiler.h 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/compiler.h 2019-11-08 16:07:08.557195893 +0300
+++ b/src/kernels/compiler/compiler.h 2019-11-09 07:35:45.789677199 +0300
@@ -5,6 +5,7 @@
#include <stdlib.h>
#include <string.h>
@ -32,8 +32,39 @@ diff -ur a/src/kernels/compiler/compiler.h b/src/kernels/compiler/compiler.h
#include "ocl-base.h"
diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-compiler.c
--- a/src/kernels/compiler/nvidia-compiler.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/compiler/nvidia-compiler.c 2019-11-08 16:07:31.381169750 +0300
@@ -183,25 +183,25 @@
+++ b/src/kernels/compiler/nvidia-compiler.c 2019-11-09 09:25:28.225931194 +0300
@@ -58,7 +58,7 @@
platform = platforms[i];
break;
}
- //printf("Platform found :%s\n",pbuf);
+ printf("Platform found :%s\n",pbuf);
}
if( platform == (cl_platform_id)NULL )
@@ -114,6 +114,9 @@
char pbuf[100];
err = _clGetDeviceInfo( devices[i], CL_DEVICE_NAME, sizeof(pbuf),pbuf, NULL );
checkErr( "clGetDeviceInfo", err );
+ cl_ulong local_mem;
+ err = _clGetDeviceInfo( devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem), &local_mem, NULL );
+ checkErr( "clGetDeviceInfo", err );
char flags[100];
if (optdisable==1) sprintf(flags,"%s -cl-nv-maxrregcount=64 ",buildparams);
@@ -157,6 +160,11 @@
printf("%s: flags = %s\n",filename,flags);
break;
}
+
+ if (local_mem == 16384) {
+ sprintf(flags,"%s -DLOCMEM16K",flags);
+ }
+
char *eflags="";
err = _clBuildProgramNoErr( program, 1, &devices[i], flags, NULL, NULL );
if (err!=CL_SUCCESS)
@@ -183,25 +191,25 @@
switch (smiter)
{
case 0:
@ -66,7 +97,7 @@ diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-
break;
}
}
@@ -288,25 +288,25 @@
@@ -288,25 +296,25 @@
switch (smiter)
{
case 0:
@ -99,9 +130,157 @@ diff -ur a/src/kernels/compiler/nvidia-compiler.c b/src/kernels/compiler/nvidia-
break;
}
free(ofname);
@@ -437,6 +445,9 @@
char pbuf[100];
err = _clGetDeviceInfo( devices[i], CL_DEVICE_NAME, sizeof(pbuf),pbuf, NULL );
checkErr( "clGetDeviceInfo", err );
+ cl_ulong local_mem;
+ err = _clGetDeviceInfo( devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem), &local_mem, NULL );
+ checkErr( "clGetDeviceInfo", err );
char flags[100];
if (optdisable==1) sprintf(flags,"%s -cl-nv-maxrregcount=64 ",buildparams);
@@ -469,6 +480,11 @@
sprintf(flags,"%s -cl-nv-arch sm_35 ",flags);
break;
}
+
+ if (local_mem == 16384) {
+ sprintf(flags,"%s -DLOCMEM16K",flags);
+ }
+
char *eflags="";
err = _clBuildProgramNoErr( program, 1, &devices[i], flags, NULL, NULL );
if (err!=CL_SUCCESS)
diff -ur a/src/kernels/nvidia_bfunix.cl b/src/kernels/nvidia_bfunix.cl
--- a/src/kernels/nvidia_bfunix.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_bfunix.cl 2019-11-09 07:46:57.205147022 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define GGI (get_global_id(0))
#define GLI (get_local_id(0))
@@ -618,4 +619,5 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_msoffice_old.cl b/src/kernels/nvidia_msoffice_old.cl
--- a/src/kernels/nvidia_msoffice_old.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_msoffice_old.cl 2019-11-09 09:19:13.545452416 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -955,4 +956,5 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_msoffice_old_md5.cl b/src/kernels/nvidia_msoffice_old_md5.cl
--- a/src/kernels/nvidia_msoffice_old_md5.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_msoffice_old_md5.cl 2019-11-09 11:01:42.330990880 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -889,3 +890,4 @@
}
#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf2.cl b/src/kernels/nvidia_pdf2.cl
--- a/src/kernels/nvidia_pdf2.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf2.cl 2019-11-09 12:38:11.964090661 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -544,4 +545,6 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
+
diff -ur a/src/kernels/nvidia_pdf3.cl b/src/kernels/nvidia_pdf3.cl
--- a/src/kernels/nvidia_pdf3.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf3.cl 2019-11-09 12:45:26.626342501 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -865,4 +866,5 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf4.cl b/src/kernels/nvidia_pdf4.cl
--- a/src/kernels/nvidia_pdf4.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf4.cl 2019-11-09 12:45:48.549791646 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -880,4 +881,5 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf5.cl b/src/kernels/nvidia_pdf5.cl
--- a/src/kernels/nvidia_pdf5.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf5.cl 2019-11-09 12:46:11.547164822 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define rotate(a,b) ((a) << (b)) + ((a) >> (32-(b)))
@@ -283,4 +284,5 @@
}
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/kernels/nvidia_pdf6.cl b/src/kernels/nvidia_pdf6.cl
--- a/src/kernels/nvidia_pdf6.cl 2014-02-04 14:36:40.000000000 +0400
+++ b/src/kernels/nvidia_pdf6.cl 2019-11-09 12:46:36.214419879 +0300
@@ -1,3 +1,4 @@
+#ifndef LOCMEM16K
#ifndef SM10
#define GGI (get_global_id(0))
#define GLI (get_local_id(0))
@@ -2647,4 +2648,5 @@
-#endif
\ No newline at end of file
+#endif
+#endif
diff -ur a/src/plugins/a51.c b/src/plugins/a51.c
--- a/src/plugins/a51.c 2014-02-04 14:36:40.000000000 +0400
+++ b/src/plugins/a51.c 2019-11-08 14:23:59.503194553 +0300
+++ b/src/plugins/a51.c 2019-11-09 07:35:45.789677199 +0300
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <unistd.h>

View file

@ -3,7 +3,7 @@
EAPI=7
inherit autotools toolchain-funcs pax-utils
inherit autotools toolchain-funcs pax-utils flag-o-matic
DESCRIPTION="Multi-threaded password recovery tool with multi-GPU support"
HOMEPAGE="https://github.com/gat3way/hashkill"
@ -18,7 +18,7 @@ LICENSE="GPL-2 public-domain"
#KEYWORDS="~amd64 ~x86"
SLOT="0"
IUSE="video_cards_amdgpu video_cards_nvidia opencl +json pax_kernel"
IUSE="video_cards_amdgpu video_cards_nvidia +opencl +json pax_kernel"
REQUIRED_USE="
video_cards_amdgpu? ( opencl )
video_cards_nvidia? ( opencl )
@ -57,6 +57,18 @@ src_prepare() {
-e "s/AC_INIT(hashkill, \(.*\),/AC_INIT(hashkill, ${PV},/" \
configure.ac || die
# do not add random CFLAGS
sed -i \
-e "s/ -O3//g" \
src/Makefile.am src/Makefile.in \
src/plugins/Makefile || die
#the following might fail if gcc is built with USE="multislot"
if has_version sys-devel/gcc[-lto]; then
einfo "Warning: compiling without LTO optimisaiton"
sed -i 's/ -flto -fwhole-program//g' src/Makefile || die
fi
if use pax_kernel && use opencl; then
sed -i \
-e "s|amd-compiler$|amd-compiler \n\t\t paxctl -m amd-compiler |g" \
@ -69,16 +81,11 @@ src_prepare() {
}
src_configure() {
filter-flags -O2
econf \
$(use_with json) \
$(usex video_cards_amdgpu '' '--disable-amd-ocl') \
$(usex video_cards_nvidia '' '--disable-nv-ocl')
#the following might fail if gcc is built with USE="multislot"
if has_version sys-devel/gcc[-lto]; then
einfo "Warning: compiling without LTO optimisaiton"
sed -i 's/ -flto -fwhole-program//g' src/Makefile || die
fi
$(use_enable video_cards_amdgpu amd-ocl) \
$(use_enable video_cards_nvidia nv-ocl)
}
src_compile() {
@ -91,10 +98,10 @@ src_compile() {
addwrite /dev/ati
fi
# Without -j1 param you can get random errors while building.
# [hashkill] (../../ocl-base.c:312) clCreateContextFromType: CL_DEVICE_NOT_AVAILABLE
# Don't remove it
emake -j1 CC="$(tc-getCC)"
# Your building speed heavily depends on your equipment.
# Without -j1 param you can get random screen freezes and errors during building:
# * [hashkill] (../../ocl-base.c:312) clCreateContextFromType: CL_DEVICE_NOT_AVAILABLE
emake CC="$(tc-getCC)" -j1
}
src_install() {
@ -110,3 +117,8 @@ src_test() {
cd tests
./test.sh || die
}
pkg_postinst() {
ewarn " ... # after installing:"
ewarn " ~$ sudo gpasswd -d portage video\n"
}