nvidia-apex, WIP

2026-05-05 11:00:48 +02:00 · 2022-11-23 10:08:49 +08:00 · 2022-11-23 10:08:49 +08:00 · 829c6b24b0
commit 829c6b24b0
parent 5ba1f511e0
8 changed files with 117 additions and 56 deletions
--- a/dev-python/cxxfilt/Manifest
+++ b/dev-python/cxxfilt/Manifest
@ -0,0 +1 @@
+DIST cxxfilt-0.3.0.tar.gz 4806 BLAKE2B 369f10c547343dc9e4bf87edd4513531109b1792d5196da57fd0a5db9f0830bf5ada1f20dd7799a3a114400a3f03cbfc795c021d2ae763557e848abca4f7094e SHA512 87cf42c8ae7a2eeb74712b2d952d011ca1e2ac90e3d9c7d254eeef1dd88ceba1f25aff377852d384f73c38cd0183898f3ba9ccde0d9f09d03025c0262290704f
--- a/dev-python/cxxfilt/cxxfilt-0.3.0.ebuild
+++ b/dev-python/cxxfilt/cxxfilt-0.3.0.ebuild
@ -0,0 +1,24 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{10..11} )
+
+inherit distutils-r1
+
+DESCRIPTION="Python interface to c++filt / abi::__cxa_demangle"
+HOMEPAGE="https://github.com/afq984/python-cxxfilt"
+SRC_URI="mirror://pypi/${P:0:1}/${PN}/${P}.tar.gz"
+
+LICENSE="BSD-2"
+SLOT="0"
+KEYWORDS="amd64 ~arm64 x86"
+IUSE=""
+RESTRICT="test"
+
+RDEPEND=""
+DEPEND="${RDEPEND}"
+REQUIRED_USE="${PYTHON_REQUIRED_USE}"
+
+#distutils_enable_tests pytest
--- a/dev-python/nvidia-apex-bin/Manifest
+++ b/dev-python/nvidia-apex-bin/Manifest
@ -0,0 +1 @@
+DIST nvidia-apex-bin-22.8.zip 3247310 BLAKE2B ef1d8ff2933d5b9866b137645769e9f1ccda2091e1dfd73b5a86e6556ef5af927e652e36181b425e2d0406f8dcd51ce0a65be86eb4d2deb278cfaf83e8d63d43 SHA512 0b92db6858f075b007fc46ce794cb015d4ad018e787850d2c25e0669f4f39ef67d1efbf5baa14d139ef747fae94ffbbf1673c62652a038fcde983178701e740d
--- a/dev-python/nvidia-apex-bin/nvidia-apex-bin-22.8.ebuild
+++ b/dev-python/nvidia-apex-bin/nvidia-apex-bin-22.8.ebuild
@ -0,0 +1,37 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_10 )
+
+#inherit distutils-r1
+inherit python-any-r1
+
+#22.08 -> 22.8
+MY_P="apex-${PV}-py3.10-linux-x86_64.egg"
+
+DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
+HOMEPAGE="https://github.com/NVIDIA/apex"
+SRC_URI="https://dev.pentoo.ch/~blshkv/distfiles/nvidia-${MY_P} -> ${P}.zip"
+
+LICENSE=""
+SLOT="0"
+#KEYWORDS="amd64 ~arm64 x86"
+IUSE=""
+
+#[${PYTHON_USEDEP}]
+RDEPEND=">=dev-python/tqdm-4.28.1
+	>=dev-python/numpy-1.15.3
+	>=dev-python/pyyaml-5.1
+"
+DEPEND="${RDEPEND}"
+
+RESTRICT="test"
+
+S=${WORKDIR}
+
+src_install() {
+	insinto "$(python_get_sitedir)/${MY_P}"
+	doins -r "./"
+}
--- a/dev-python/nvidia-apex/Manifest
+++ b/dev-python/nvidia-apex/Manifest
@ -1,2 +1 @@
-DIST nvidia-apex-22.04-gh.tar.gz 769456 BLAKE2B 9c9270e5b19afa244784f0825a2c093820aab5e64f5621ea0350da6b127d14b734fcbf16982ff889a82b624842d982c0f447146a9efddd165dd59916988ee955 SHA512 347a67640dd1903442dacc59cb7c15efe299cafc3d7afd36ae2f1fbe4b7b08d23c18a5351035f62f3b7f47023331d37485a99cc0049ef8c788f27a32146d9e64
 DIST nvidia-apex-22.08-gh.tar.gz 746501 BLAKE2B 7d882e551c14b7b3a6b463f7873d22603b55e957ffd792735ab05ba158706c61c492325f87fb5d3370cf96108a7a4ec9546efc93c78a3e96f16470e43b93cee8 SHA512 11291444f5038f6b1702e7bd2c65daa29746ada9add18d1af226ed7d3facd21c4fe4ba2e262936f6a95e3a84f41184edf2a1ff964394fc6eb7b23e76157dcdf5
--- a/dev-python/nvidia-apex/files/disable_cuda.patch
+++ b/dev-python/nvidia-apex/files/disable_cuda.patch
@ -0,0 +1,11 @@
+--- apex-22.08-dev/apex/normalization/fused_layer_norm.py.orig	2022-08-02 08:25:04.000000000 +0800
+++ apex-22.08-dev/apex/normalization/fused_layer_norm.py	2022-11-21 13:12:25.579343523 +0800
+@@ -265,7 +265,7 @@
+         super().__init__()
+ 
+         global fused_layer_norm_cuda
+-        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+        #fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+ 
+         if isinstance(normalized_shape, numbers.Integral):
+             normalized_shape = (normalized_shape,)
--- a/dev-python/nvidia-apex/nvidia-apex-22.04.ebuild
+++ b/dev-python/nvidia-apex/nvidia-apex-22.04.ebuild
@ -1,37 +0,0 @@
-# Copyright 1999-2022 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=8
-
-PYTHON_COMPAT=( python3_{10..11} )
-
-inherit distutils-r1
-
-#brach 22.04-dev
-HASH_COMMIT="${PV}-dev"
-
-DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
-HOMEPAGE="https://github.com/NVIDIA/apex"
-SRC_URI="https://github.com/NVIDIA/apex/archive/${HASH_COMMIT}.tar.gz -> ${P}-gh.tar.gz"
-
-LICENSE=""
-SLOT="0"
-KEYWORDS="amd64 ~arm64 x86"
-IUSE=""
-
-RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
-	>=dev-python/numpy-1.15.3[${PYTHON_USEDEP}]
-	>=dev-python/pyyaml-5.1[${PYTHON_USEDEP}]"
-DEPEND="${RDEPEND}"
-REQUIRED_USE="${PYTHON_REQUIRED_USE}"
-
-S="${WORKDIR}/apex-${HASH_COMMIT}"
-
-#src_compile(){
-#	pip install -v --disable-pip-version-check --no-cache-dir ./
-#pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
-#}
-
-python_compile() {
-    distutils-r1_python_compile --cpp_ext --cuda_ext
-}
--- a/dev-python/nvidia-apex/nvidia-apex-22.08.ebuild
+++ b/dev-python/nvidia-apex/nvidia-apex-22.08.ebuild
@ -3,11 +3,12 @@

 EAPI=8

+DISTUTILS_IN_SOURCE_BUILD=1
 PYTHON_COMPAT=( python3_{10..11} )

 inherit distutils-r1

-#brach 22.04-dev
+#git branch
 HASH_COMMIT="${PV}-dev"

 DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
@ -17,7 +18,9 @@ SRC_URI="https://github.com/NVIDIA/apex/archive/${HASH_COMMIT}.tar.gz -> ${P}-gh
 LICENSE=""
 SLOT="0"
 KEYWORDS="amd64 ~arm64 x86"
-IUSE="cuda"
+
+#FIXME: can't use global "cuda"
+IUSE="cuda_ext"

 RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
 	>=dev-python/numpy-1.15.3[${PYTHON_USEDEP}]
@ -25,27 +28,49 @@ RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
 DEPEND="${RDEPEND}"
 REQUIRED_USE="${PYTHON_REQUIRED_USE}"

+RESTRICT="test"
+
 S="${WORKDIR}/apex-${HASH_COMMIT}"

+#python_prepare() {
+#	if use !cuda_ext; then
+#		einfo "CUDA disabled"
+		#sed -i -e "s|fused_layer_norm_cuda = importlib|\#fused_layer_norm_cuda = importlib|" apex/normalization/fused_layer_norm.py || die
+#		eapply ${FILESDIR}/disable_cuda.patch
+#	fi
+#	eapply_user
+#}
+
+#python_prepare_all() {
+#	export TORCH_CUDA_ARCH_LIST="compute capability"
+#	python_setup
+#	esetup.py
+#	distutils-r1_python_prepare_all
+#}
+
 #If you wish to cross-compile for a single specific architecture,
 #export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
-
-python_compile() {
-	if use cuda; then
-		distutils-r1_python_compile --cpp_ext --cuda_ext
+python_configure_all() {
+#	export MAX_JOBS=1
+	if use cuda_ext; then
+#		export TORCH_CUDA_ARCH_LIST="compute capability"
+		addpredict "/dev/nvidiactl"
+		DISTUTILS_ARGS=( --cpp_ext --cuda_ext )
 	fi
 }

-python_install() {
-	if use cuda; then
-		# disable gpu check
-		# required for cross-compile and sanbox
-		export TORCH_CUDA_ARCH_LIST="compute capability"
-		distutils-r1_python_install --cpp_ext --cuda_ext
-	fi
-}
+#python_compile_all() {
+#	export TORCH_CUDA_ARCH_LIST="compute capability"
+#	esetup.py
+#}

-
-#FIXME:
-#https://github.com/NVIDIA/apex/issues/161
-# "No module named 'fused_layer_norm_cuda'"
+#python_compile() {
+#	export TORCH_CUDA_ARCH_LIST="compute capability"
+	# breaks with parallel build
+	# need to avoid dropping .so plugins into
+	# build-lib, which breaks tests
+#	esetup.py build_ext --inplace
+	# --cpp_ext --cuda_ext build_ext --inplace -j1
+#	TORCH_CUDA_ARCH_LIST="compute capability" distutils-r1_python_compile -j1
+#	distutils-r1_python_compile -j1
+#}
				`@ -0,0 +1 @@`
				`DIST cxxfilt-0.3.0.tar.gz 4806 BLAKE2B 369f10c547343dc9e4bf87edd4513531109b1792d5196da57fd0a5db9f0830bf5ada1f20dd7799a3a114400a3f03cbfc795c021d2ae763557e848abca4f7094e SHA512 87cf42c8ae7a2eeb74712b2d952d011ca1e2ac90e3d9c7d254eeef1dd88ceba1f25aff377852d384f73c38cd0183898f3ba9ccde0d9f09d03025c0262290704f`
				`@ -0,0 +1 @@`
				`DIST nvidia-apex-bin-22.8.zip 3247310 BLAKE2B ef1d8ff2933d5b9866b137645769e9f1ccda2091e1dfd73b5a86e6556ef5af927e652e36181b425e2d0406f8dcd51ce0a65be86eb4d2deb278cfaf83e8d63d43 SHA512 0b92db6858f075b007fc46ce794cb015d4ad018e787850d2c25e0669f4f39ef67d1efbf5baa14d139ef747fae94ffbbf1673c62652a038fcde983178701e740d`