nvidia-apex, WIP

This commit is contained in:
Anton Bolshakov 2022-11-23 10:08:49 +08:00
parent 5ba1f511e0
commit 829c6b24b0
No known key found for this signature in database
GPG key ID: 32BDCED870788F04
8 changed files with 117 additions and 56 deletions

View file

@ -0,0 +1 @@
DIST cxxfilt-0.3.0.tar.gz 4806 BLAKE2B 369f10c547343dc9e4bf87edd4513531109b1792d5196da57fd0a5db9f0830bf5ada1f20dd7799a3a114400a3f03cbfc795c021d2ae763557e848abca4f7094e SHA512 87cf42c8ae7a2eeb74712b2d952d011ca1e2ac90e3d9c7d254eeef1dd88ceba1f25aff377852d384f73c38cd0183898f3ba9ccde0d9f09d03025c0262290704f

View file

@ -0,0 +1,24 @@
# Copyright 1999-2022 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
PYTHON_COMPAT=( python3_{10..11} )
inherit distutils-r1
DESCRIPTION="Python interface to c++filt / abi::__cxa_demangle"
HOMEPAGE="https://github.com/afq984/python-cxxfilt"
SRC_URI="mirror://pypi/${P:0:1}/${PN}/${P}.tar.gz"
LICENSE="BSD-2"
SLOT="0"
KEYWORDS="amd64 ~arm64 x86"
IUSE=""
RESTRICT="test"
RDEPEND=""
DEPEND="${RDEPEND}"
REQUIRED_USE="${PYTHON_REQUIRED_USE}"
#distutils_enable_tests pytest

View file

@ -0,0 +1 @@
DIST nvidia-apex-bin-22.8.zip 3247310 BLAKE2B ef1d8ff2933d5b9866b137645769e9f1ccda2091e1dfd73b5a86e6556ef5af927e652e36181b425e2d0406f8dcd51ce0a65be86eb4d2deb278cfaf83e8d63d43 SHA512 0b92db6858f075b007fc46ce794cb015d4ad018e787850d2c25e0669f4f39ef67d1efbf5baa14d139ef747fae94ffbbf1673c62652a038fcde983178701e740d

View file

@ -0,0 +1,37 @@
# Copyright 1999-2022 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
PYTHON_COMPAT=( python3_10 )
#inherit distutils-r1
inherit python-any-r1
#22.08 -> 22.8
MY_P="apex-${PV}-py3.10-linux-x86_64.egg"
DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
HOMEPAGE="https://github.com/NVIDIA/apex"
SRC_URI="https://dev.pentoo.ch/~blshkv/distfiles/nvidia-${MY_P} -> ${P}.zip"
LICENSE=""
SLOT="0"
#KEYWORDS="amd64 ~arm64 x86"
IUSE=""
#[${PYTHON_USEDEP}]
RDEPEND=">=dev-python/tqdm-4.28.1
>=dev-python/numpy-1.15.3
>=dev-python/pyyaml-5.1
"
DEPEND="${RDEPEND}"
RESTRICT="test"
S=${WORKDIR}
src_install() {
insinto "$(python_get_sitedir)/${MY_P}"
doins -r "./"
}

View file

@ -1,2 +1 @@
DIST nvidia-apex-22.04-gh.tar.gz 769456 BLAKE2B 9c9270e5b19afa244784f0825a2c093820aab5e64f5621ea0350da6b127d14b734fcbf16982ff889a82b624842d982c0f447146a9efddd165dd59916988ee955 SHA512 347a67640dd1903442dacc59cb7c15efe299cafc3d7afd36ae2f1fbe4b7b08d23c18a5351035f62f3b7f47023331d37485a99cc0049ef8c788f27a32146d9e64
DIST nvidia-apex-22.08-gh.tar.gz 746501 BLAKE2B 7d882e551c14b7b3a6b463f7873d22603b55e957ffd792735ab05ba158706c61c492325f87fb5d3370cf96108a7a4ec9546efc93c78a3e96f16470e43b93cee8 SHA512 11291444f5038f6b1702e7bd2c65daa29746ada9add18d1af226ed7d3facd21c4fe4ba2e262936f6a95e3a84f41184edf2a1ff964394fc6eb7b23e76157dcdf5

View file

@ -0,0 +1,11 @@
--- apex-22.08-dev/apex/normalization/fused_layer_norm.py.orig 2022-08-02 08:25:04.000000000 +0800
+++ apex-22.08-dev/apex/normalization/fused_layer_norm.py 2022-11-21 13:12:25.579343523 +0800
@@ -265,7 +265,7 @@
super().__init__()
global fused_layer_norm_cuda
- fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+ #fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = (normalized_shape,)

View file

@ -1,37 +0,0 @@
# Copyright 1999-2022 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
PYTHON_COMPAT=( python3_{10..11} )
inherit distutils-r1
#brach 22.04-dev
HASH_COMMIT="${PV}-dev"
DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
HOMEPAGE="https://github.com/NVIDIA/apex"
SRC_URI="https://github.com/NVIDIA/apex/archive/${HASH_COMMIT}.tar.gz -> ${P}-gh.tar.gz"
LICENSE=""
SLOT="0"
KEYWORDS="amd64 ~arm64 x86"
IUSE=""
RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
>=dev-python/numpy-1.15.3[${PYTHON_USEDEP}]
>=dev-python/pyyaml-5.1[${PYTHON_USEDEP}]"
DEPEND="${RDEPEND}"
REQUIRED_USE="${PYTHON_REQUIRED_USE}"
S="${WORKDIR}/apex-${HASH_COMMIT}"
#src_compile(){
# pip install -v --disable-pip-version-check --no-cache-dir ./
#pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
#}
python_compile() {
distutils-r1_python_compile --cpp_ext --cuda_ext
}

View file

@ -3,11 +3,12 @@
EAPI=8
DISTUTILS_IN_SOURCE_BUILD=1
PYTHON_COMPAT=( python3_{10..11} )
inherit distutils-r1
#brach 22.04-dev
#git branch
HASH_COMMIT="${PV}-dev"
DESCRIPTION="NVIDIA-maintained utilities to streamline mixed precision and distributed training in Pytorch"
@ -17,7 +18,9 @@ SRC_URI="https://github.com/NVIDIA/apex/archive/${HASH_COMMIT}.tar.gz -> ${P}-gh
LICENSE=""
SLOT="0"
KEYWORDS="amd64 ~arm64 x86"
IUSE="cuda"
#FIXME: can't use global "cuda"
IUSE="cuda_ext"
RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
>=dev-python/numpy-1.15.3[${PYTHON_USEDEP}]
@ -25,27 +28,49 @@ RDEPEND=">=dev-python/tqdm-4.28.1[${PYTHON_USEDEP}]
DEPEND="${RDEPEND}"
REQUIRED_USE="${PYTHON_REQUIRED_USE}"
RESTRICT="test"
S="${WORKDIR}/apex-${HASH_COMMIT}"
#python_prepare() {
# if use !cuda_ext; then
# einfo "CUDA disabled"
#sed -i -e "s|fused_layer_norm_cuda = importlib|\#fused_layer_norm_cuda = importlib|" apex/normalization/fused_layer_norm.py || die
# eapply ${FILESDIR}/disable_cuda.patch
# fi
# eapply_user
#}
#python_prepare_all() {
# export TORCH_CUDA_ARCH_LIST="compute capability"
# python_setup
# esetup.py
# distutils-r1_python_prepare_all
#}
#If you wish to cross-compile for a single specific architecture,
#export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.
python_compile() {
if use cuda; then
distutils-r1_python_compile --cpp_ext --cuda_ext
python_configure_all() {
# export MAX_JOBS=1
if use cuda_ext; then
# export TORCH_CUDA_ARCH_LIST="compute capability"
addpredict "/dev/nvidiactl"
DISTUTILS_ARGS=( --cpp_ext --cuda_ext )
fi
}
python_install() {
if use cuda; then
# disable gpu check
# required for cross-compile and sanbox
export TORCH_CUDA_ARCH_LIST="compute capability"
distutils-r1_python_install --cpp_ext --cuda_ext
fi
}
#python_compile_all() {
# export TORCH_CUDA_ARCH_LIST="compute capability"
# esetup.py
#}
#FIXME:
#https://github.com/NVIDIA/apex/issues/161
# "No module named 'fused_layer_norm_cuda'"
#python_compile() {
# export TORCH_CUDA_ARCH_LIST="compute capability"
# breaks with parallel build
# need to avoid dropping .so plugins into
# build-lib, which breaks tests
# esetup.py build_ext --inplace
# --cpp_ext --cuda_ext build_ext --inplace -j1
# TORCH_CUDA_ARCH_LIST="compute capability" distutils-r1_python_compile -j1
# distutils-r1_python_compile -j1
#}