bulk_extractor: fix https://github.com/pentoo/pentoo-overlay/issues/1519

2025-12-06 08:25:01 +01:00 · 2023-06-07 11:00:19 +08:00 · 2023-06-07 11:00:19 +08:00 · b8361a152f
commit b8361a152f
parent 3c7cf94420
9 changed files with 133 additions and 935 deletions
--- a/app-forensics/bulk_extractor/Manifest
+++ b/app-forensics/bulk_extractor/Manifest
@ -0,0 +1 @@
+DIST bulk_extractor-2.0.3.tar.gz 8456967 BLAKE2B b8184c24dfc1ba9004f44f19a118cb84a9938f1aaf60663fe0bb259045ca4fe86ab339f8a265a71463fdc7e09b7a2f42989990c863f0888f5a2b4f80bd791677 SHA512 e1554f7f9863122ccd7405a5ec713fb3a09eed8e45db4c0c9580e8e914f1a477664683109c2b05e80a5dab169db8aa12ec8d0a49d8a959dc4ab622c11e0612f5
--- a/app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild
+++ b/app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild
@ -0,0 +1,116 @@
+# Copyright 1999-2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+inherit autotools desktop eutils xdg-utils
+
+DESCRIPTION="Scans a disk image for regular expressions and other content"
+HOMEPAGE="https://github.com/simsong/bulk_extractor"
+SRC_URI="https://digitalcorpora.s3.amazonaws.com/downloads/bulk_extractor/${P}.tar.gz"
+
+LICENSE="GPL-2"
+SLOT="0"
+KEYWORDS="amd64 ~x86"
+
+#fails to compile with ewf
+#fails to compile with exiv2
+#fails to compile without rar
+IUSE="aff doc beviewer exiv2 hashdb +rar"
+
+#	ewf? ( app-forensics/libewf )
+RDEPEND="
+	aff? ( app-forensics/afflib )
+	dev-libs/boost
+	dev-libs/expat
+	dev-libs/openssl:0=
+	dev-db/sqlite:3
+	dev-libs/libxml2
+	exiv2? ( media-gfx/exiv2 )
+	sys-libs/zlib
+	hashdb? ( dev-libs/hashdb )
+	beviewer? (
+		|| ( virtual/jre:* virtual/jdk:* )
+	)"
+
+DEPEND="${RDEPEND}
+	doc? ( app-doc/doxygen )
+	virtual/man"
+
+BDEPEND="
+	sys-devel/flex
+	virtual/pkgconfig"
+
+src_prepare() {
+	eapply "${FILESDIR}/bulk_extractor-2.0.3_uint32_t.patch"
+
+	if [[ ${PV} != *9999 ]]; then
+		sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \
+			-i configure.ac || die
+	fi
+
+	eautoreconf
+	default
+}
+
+src_configure() {
+	econf \
+		--disable-o3 \
+		--disable-libewf
+#		$(use ewf || echo "--disable-libewf")
+#		$(use beviewer || echo "--disable-BEViewer") \
+#		$(use exiv2 && echo "--enable-exiv2") \
+#		$(use aff || echo "--disable-afflib") \
+#		$(use hashdb || echo "--disable-hashdb") \
+#		$(use rar || echo "--disable-rar" )
+}
+
+src_install() {
+	dobin src/${PN}
+	doman man/*.1
+	dodoc AUTHORS ChangeLog NEWS README.md
+
+	if use doc ; then
+		pushd doc/doxygen >/dev/null || die
+		doxygen || die "doxygen failed"
+		popd >/dev/null || die
+
+		dodoc -r \
+			doc/doxygen/html \
+			doc/Diagnostics_Notes \
+			doc/announce \
+			doc/*.{pdf,txt,md} \
+			doc/programmer_manual/*.pdf
+	fi
+
+#	if use beviewer; then
+#		local bev_dir="/opt/beviewer-${PV}"
+
+#		insinto "${bev_dir}"
+#		doins java_gui/BEViewer.jar
+
+#		insinto /usr/share/pixmaps
+#		newins java_gui/icons/24/run-build-install.png ${PN}.png
+
+#		make_wrapper "beviewer" \
+#			"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
+#		make_desktop_entry \
+#			"beviewer" \
+#			"BEViewer (bulk_extractor)" \
+#			"${PN}" "Utility"
+#	fi
+}
+
+#pkg_postinst() {
+#	if use beviewer; then
+#		xdg_icon_cache_update
+#		xdg_desktop_database_update
+#	fi
+#}
+
+#pkg_postrm() {
+#	if use beviewer; then
+#		xdg_icon_cache_update
+#		xdg_desktop_database_update
+#	fi
+#}
--- a/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild
+++ b/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild
@ -15,6 +15,7 @@ if [[ ${PV} != *9999 ]]; then
 	#EGIT_COMMIT="8563614408834087f242297813de9f75bdc9bedc"
 	EGIT_OVERRIDE_COMMIT_SIMSONG_BULK_EXTRACTOR="v2.0.3"
 #	EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1"
+#	EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1"
 #	EGIT_OVERRIDE_COMMIT_DFXML_WORKING_GROUP_DFXML_CPP="a283c888b4bb84b3dab937928f9495290a5a8a47"
 #	EGIT_OVERRIDE_COMMIT_NEMTRIF_UTFCPP="2ad995746bf1731d5e21cde47c9c3deff56bdbc2"
 	KEYWORDS="amd64 ~x86"
--- a/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild
+++ b/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild
@ -1,116 +0,0 @@
-# Copyright 1999-2022 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI=7
-
-inherit autotools desktop eutils git-r3 xdg-utils
-
-DESCRIPTION="Scans a disk image for regular expressions and other content"
-HOMEPAGE="https://github.com/simsong/bulk_extractor"
-
-# Please check a ".gitmodules" file on upstream before bump it
-EGIT_REPO_URI="https://github.com/simsong/bulk_extractor"
-if [[ ${PV} != *9999 ]]; then
-	EGIT_COMMIT="a52b133a3c56a483caa59eb8c68634ee1648c4ec" # 20191111 release
-	KEYWORDS="~amd64 ~x86"
-fi
-
-LICENSE="GPL-2"
-SLOT="0"
-IUSE="aff doc +beviewer +ewf +exiv2 hashdb rar"
-
-RDEPEND="
-	aff? ( app-forensics/afflib )
-	dev-libs/boost
-	dev-libs/expat
-	dev-libs/openssl:0=
-	dev-db/sqlite:3
-	dev-libs/libxml2
-	ewf? ( app-forensics/libewf )
-	exiv2? ( media-gfx/exiv2 )
-	sys-libs/zlib
-	hashdb? ( dev-libs/hashdb )
-	beviewer? (
-		|| ( virtual/jre:* virtual/jdk:* )
-	)"
-
-DEPEND="${RDEPEND}
-	doc? ( app-doc/doxygen )
-	virtual/man"
-
-BDEPEND="
-	sys-devel/flex
-	virtual/pkgconfig"
-
-src_prepare() {
-	eapply "${FILESDIR}/add_exiv2-0.27_api_support.patch"
-
-	if [[ ${PV} != *9999 ]]; then
-		sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \
-			-i configure.ac || die
-	fi
-
-	eautoreconf
-	default
-}
-
-src_configure() {
-	econf \
-		--without-o3 \
-		$(use aff || echo "--disable-afflib") \
-		$(use beviewer || echo "--disable-BEViewer") \
-		$(use ewf || echo "--disable-libewf") \
-		$(use exiv2 && echo "--enable-exiv2") \
-		$(use hashdb || echo "--disable-hashdb") \
-		$(use rar || echo "--disable-rar" )
-}
-
-src_install() {
-	dobin src/${PN} plugins/plugin_test
-	doman man/*.1
-	dodoc AUTHORS ChangeLog NEWS README.md
-
-	if use doc ; then
-		pushd doc/doxygen >/dev/null || die
-		doxygen || die "doxygen failed"
-		popd >/dev/null || die
-
-		dodoc -r \
-			doc/doxygen/html \
-			doc/Diagnostics_Notes \
-			doc/announce \
-			doc/*.{pdf,txt,md} \
-			doc/programmer_manual/*.pdf
-	fi
-
-	if use beviewer; then
-		local bev_dir="/opt/beviewer-${PV}"
-
-		insinto "${bev_dir}"
-		doins java_gui/BEViewer.jar
-
-		insinto /usr/share/pixmaps
-		newins java_gui/icons/24/run-build-install.png ${PN}.png
-
-		make_wrapper "beviewer" \
-			"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
-		make_desktop_entry \
-			"beviewer" \
-			"BEViewer (bulk_extractor)" \
-			"${PN}" "Utility"
-	fi
-}
-
-pkg_postinst() {
-	if use beviewer; then
-		xdg_icon_cache_update
-		xdg_desktop_database_update
-	fi
-}
-
-pkg_postrm() {
-	if use beviewer; then
-		xdg_icon_cache_update
-		xdg_desktop_database_update
-	fi
-}
--- a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch
+++ b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch
@ -1,24 +0,0 @@
-diff -ur a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp
--- a/src/scan_exiv2.cpp	2014-09-16 22:34:00.000000000 +0400
-+++ b/src/scan_exiv2.cpp	2019-03-17 08:38:29.479753464 +0300
-@@ -68,7 +68,7 @@
-  * Used for helping to convert libexiv2's GPS format to decimal lat/long
-  */
- 
-static double stod(string s)
-+static double sub_stod(string s)
- {
-     double d=0;
-     sscanf(s.c_str(),"%lf",&d);
-@@ -78,9 +78,9 @@
- static double rational(string s)
- {
-     std::vector<std::string> parts = split(s,'/');
-    if(parts.size()!=2) return stod(s);	// no slash, so return without
-    double top = stod(parts[0]);
-    double bot = stod(parts[1]);
-+    if(parts.size()!=2) return sub_stod(s);	// no slash, so return without
-+    double top = sub_stod(parts[0]);
-+    double bot = sub_stod(parts[1]);
-     return bot>0 ? top / bot : top;
- }
--- a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch
+++ b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch
@ -1,733 +0,0 @@
-diff -ur a/src/scan_hashdb.cpp b/src/scan_hashdb.cpp
--- a/src/scan_hashdb.cpp	2014-09-16 22:34:00.000000000 +0400
-+++ b/src/scan_hashdb.cpp	2019-03-16 14:07:05.887464616 +0300
-@@ -31,47 +31,146 @@
- 
- #ifdef HAVE_HASHDB
- 
-+//#define DEBUG_V2_OUT
-+
- #include "hashdb.hpp"
- #include <dfxml/src/hash_t.h>
- 
- #include <iostream>
-#include <unistd.h>	// for getpid
-#include <sys/types.h>	// for getpid
-+#include <cmath>
-+#include <unistd.h> // for getpid
-+#include <sys/types.h>  // for getpid
- 
- // user settings
-static std::string hashdb_mode="none";                       // import or scan
-static uint32_t hashdb_block_size=4096;                      // import or scan
-static bool hashdb_ignore_empty_blocks=true;                 // import or scan
-static std::string hashdb_scan_path_or_socket="your_hashdb_directory"; // scan only
-static size_t hashdb_scan_sector_size = 512;                    // scan only
-static size_t hashdb_import_sector_size = 4096;                 // import only
-static std::string hashdb_import_repository_name="default_repository"; // import only
-static uint32_t hashdb_import_max_duplicates=0;                 // import only
-+static std::string hashdb_mode="none";                                 // import or scan
-+static uint32_t hashdb_block_size=512;                                 // import or scan
-+static uint32_t hashdb_step_size=512;                                  // import or scan
-+static std::string hashdb_scan_path="your_hashdb_directory";           // scan only
-+static std::string hashdb_repository_name="default_repository";        // import only
-+static uint32_t hashdb_max_feature_file_lines=0;                       // scan only for feature file
- 
- // runtime modes
- // scanner mode
- enum mode_type_t {MODE_NONE, MODE_SCAN, MODE_IMPORT};
- static mode_type_t mode = MODE_NONE;
- 
-// internal helper functions
-static void do_import(const class scanner_params &sp,
-                      const recursion_control_block &rcb);
-static void do_scan(const class scanner_params &sp,
-                    const recursion_control_block &rcb);
-inline bool is_empty_block(const uint8_t *buf);
-
- // global state
- 
- // hashdb directory, import only
- static std::string hashdb_dir;
- 
- // hash type
-typedef md5_t hash_t;
- typedef md5_generator hash_generator;
- 
- // hashdb manager
-typedef hashdb_t__<hash_t> hashdb_t;
-hashdb_t* hashdb;
-+static hashdb::import_manager_t* import_manager;
-+static hashdb::scan_manager_t* scan_manager;
-+
-+static void do_import(const class scanner_params &sp,
-+                      const recursion_control_block &rcb);
-+static void do_scan(const class scanner_params &sp,
-+                    const recursion_control_block &rcb);
-+
-+
-+// safely hash sbuf range without overflow failure
-+inline const md5_t hash_one_block(const sbuf_t &sbuf)
-+{
-+    if (sbuf.bufsize >= hashdb_block_size) {
-+        // hash from the beginning
-+        return hash_generator::hash_buf(sbuf.buf, hashdb_block_size);
-+    }
-+    // hash the available part and zero-fill
-+    hash_generator g;
-+    g.update(sbuf.buf, sbuf.bufsize);
-+
-+    // hash in extra zeros to fill out the block
-+    size_t extra = hashdb_block_size - sbuf.bufsize;
-+    std::vector<uint8_t> zeros(extra);
-+    g.update(&zeros[0], extra);
-+    return g.final();
-+}
-+
-+// rules for determining if a block should be ignored
-+static bool ramp_trait(const sbuf_t &sbuf)
-+{
-+    if (sbuf.pagesize < 8) {
-+        // not enough to process
-+        return false;
-+    }
-+
-+    uint32_t count = 0;
-+    for(size_t i=0;i<sbuf.pagesize-8;i+=4){
-+        // note that little endian is detected and big endian is not detected
-+        if (sbuf.get32u(i)+1 == sbuf.get32u(i+4)) {
-+            count += 1;
-+        }
-+    }
-+    return count > sbuf.pagesize/8;
-+}
-+
-+static bool hist_trait(const sbuf_t &sbuf)
-+{
-+    if (sbuf.pagesize < hashdb_block_size) {
-+        // do not perform any histogram analysis on short blocks
-+        return false;
-+    }
-+
-+    std::map<uint32_t,uint32_t> hist;
-+    for(size_t i=0;i<sbuf.pagesize-4;i+=4){
-+        hist[sbuf.get32uBE(i)] += 1;
-+    }
-+    if (hist.size() < 3) return true;
-+    for (std::map<uint32_t,uint32_t>::const_iterator it = hist.begin();it != hist.end(); it++){
-+        if ((it->second) > hashdb_block_size/16){
-+            return true;
-+        }
-+    }
-+    return false;
-+}
-+
-+static bool whitespace_trait(const sbuf_t &sbuf)
-+{
-+    size_t count = 0;
-+    for(size_t i=0;i<sbuf.pagesize;i++){
-+        if (isspace(sbuf[i])) count+=1;
-+    }
-+    return count >= (sbuf.pagesize * 3)/4;
-+}
-+
-+static bool monotonic_trait(const sbuf_t &sbuf)
-+{
-+    if (sbuf.pagesize < 16) {
-+        // not enough data
-+        return false;
-+    }
-+
-+    const double total = sbuf.pagesize / 4.0;
-+    int increasing = 0, decreasing = 0, same = 0;
-+    for (size_t i=0; i+8<sbuf.pagesize; i+=4) {
-+        if (sbuf.get32u(i+4) > sbuf.get32u(i)) {
-+            increasing++;
-+        } else if (sbuf.get32u(i+4) < sbuf.get32u(i)) {
-+            decreasing++;
-+        } else {
-+            same++;
-+        }
-+    }
-+    if (increasing / total >= 0.75) return true;
-+    if (decreasing / total >= 0.75) return true;
-+    if (same / total >= 0.75) return true;
-+    return false;
-+}
-+
-+// detect if block is all the same
-+inline bool empty_sbuf(const sbuf_t &sbuf)
-+{
-+    for (size_t i=1; i<sbuf.bufsize; i++) {
-+        if (sbuf[i] != sbuf[0]) {
-+            return false;
-+        }
-+    }
-+    return true;                        // all the same
-+}
- 
- extern "C"
- void scan_hashdb(const class scanner_params &sp,
-@@ -82,9 +181,12 @@
-         case scanner_params::PHASE_STARTUP: {
- 
-             // set properties for this scanner
-+            std::string desc = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
-+            desc += std::string(" (hashdb version") + std::string(hashdb_version()) + std::string(")");
-+
-             sp.info->name        = "hashdb";
-             sp.info->author      = "Bruce Allen";
-            sp.info->description = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
-+            sp.info->description = desc;
-             sp.info->flags       = scanner_info::SCANNER_DISABLED;
- 
-             // hashdb_mode
-@@ -97,60 +199,52 @@
- 
-             // hashdb_block_size
-             sp.info->get_config("hashdb_block_size", &hashdb_block_size,
-                         "Hash block size, in bytes, used to generate hashes");
-+                         "Selects the block size to hash, in bytes.");
- 
-            // hashdb_ignore_empty_blocks
-            sp.info->get_config("hashdb_ignore_empty_blocks", &hashdb_ignore_empty_blocks,
-                         "Selects to ignore empty blocks.");
-
-            // hashdb_scan_path_or_socket
-            std::stringstream ss_hashdb_scan_path_or_socket;
-            ss_hashdb_scan_path_or_socket
-                << "File path to a hash database or\n"
-                << "      socket to a hashdb server to scan against.  Valid only in scan mode.";
-            sp.info->get_config("hashdb_scan_path_or_socket", &hashdb_scan_path_or_socket,
-                                ss_hashdb_scan_path_or_socket.str());
-
-            // hashdb_scan_sector_size
-            std::stringstream ss_hashdb_scan_sector_size;
-            ss_hashdb_scan_sector_size
-                << "Selects the scan sector size.  Scans along\n"
-                << "      sector boundaries.  Valid only in scan mode.";
-            sp.info->get_config("hashdb_scan_sector_size", &hashdb_scan_sector_size,
-                                ss_hashdb_scan_sector_size.str());
-
-            // hashdb_import_sector_size
-            std::stringstream ss_hashdb_import_sector_size;
-            ss_hashdb_import_sector_size
-                << "Selects the import sector size.  Imports along\n"
-                << "      sector boundaries.  Valid only in import mode.";
-            sp.info->get_config("hashdb_import_sector_size", &hashdb_import_sector_size,
-                                ss_hashdb_import_sector_size.str());
-+            // hashdb_step_size
-+            std::stringstream ss_hashdb_step_size;
-+            ss_hashdb_step_size
-+                << "Selects the step size.  Scans and imports along\n"
-+                << "      this step value.";
-+            sp.info->get_config("hashdb_step_size", &hashdb_step_size,
-+                                ss_hashdb_step_size.str());
-+
-+
-+            // hashdb_scan_path
-+            std::stringstream ss_hashdb_scan_path;
-+            ss_hashdb_scan_path
-+                << "File path to a hash database to scan against.\n"
-+                << "      Valid only in scan mode.";
-+            sp.info->get_config("hashdb_scan_path", &hashdb_scan_path,
-+                                ss_hashdb_scan_path.str());
- 
-            // hashdb_import_repository_name
-+            // hashdb_repository_name
-             std::stringstream ss_hashdb_import_repository_name;
-             ss_hashdb_import_repository_name
-                 << "Sets the repository name to\n"
-                 << "      attribute the import to.  Valid only in import mode.";
-            sp.info->get_config("hashdb_import_repository_name",
-                                &hashdb_import_repository_name,
-+            sp.info->get_config("hashdb_repository_name",
-+                                &hashdb_repository_name,
-                                 ss_hashdb_import_repository_name.str());
- 
-            // hashdb_import_max_duplicates
-            std::stringstream ss_hashdb_import_max_duplicates;
-            ss_hashdb_import_max_duplicates
-                << "The maximum number of duplicates to import\n"
-                << "      for a given hash value, or 0 for no limit.  Valid only in import mode.";
-            sp.info->get_config("hashdb_import_max_duplicates", &hashdb_import_max_duplicates,
-                                ss_hashdb_import_max_duplicates.str());
-
-
-             // configure the feature file to accept scan features
-             // but only if in scan mode
-             if (hashdb_mode == "scan") {
-                 sp.info->feature_names.insert("identified_blocks");
-+#ifdef DEBUG_V2_OUT
-+                sp.info->feature_names.insert("identified_blocks2");
-+#endif
-             }
- 
-+            // hashdb_max_feature_file_lines
-+            std::stringstream ss_hashdb_max_feature_file_lines;
-+            ss_hashdb_max_feature_file_lines
-+                << "The maximum number of features lines to record\n"
-+                << "      or 0 for no limit.  Valid only in scan mode.";
-+            sp.info->get_config("hashdb_max_feature_file_lines", &hashdb_max_feature_file_lines,
-+                                ss_hashdb_max_feature_file_lines.str());
-+
-+
-             return;
-         }
- 
-@@ -168,62 +262,27 @@
-             } else {
-                 // bad mode
-                 std::cerr << "Error.  Parameter 'hashdb_mode' value '"
-                          << hashdb_mode << "' is invalid.\n"
-+                          << hashdb_mode << "' must be [none|import|scan].\n"
-                           << "Cannot continue.\n";
-                 exit(1);
-             }
- 
-            // hashdb_ignore_empty_blocks
-            // checks not performed
-
-             // hashdb_block_size
-             if (hashdb_block_size == 0) {
-                 std::cerr << "Error.  Value for parameter 'hashdb_block_size' is invalid.\n"
-                         << "Cannot continue.\n";
-                exit(1);
-            }
-
-            // hashdb_scan_path_or_socket
-            // checks not performed
-
-            // hashdb_scan_sector_size
-            if (hashdb_scan_sector_size == 0) {
-                std::cerr << "Error.  Value for parameter 'hashdb_scan_sector_size' is invalid.\n"
-                          << "Cannot continue.\n";
-                exit(1);
-            }
-
-            // for valid operation, scan sectors must align on hash block boundaries
-            if (mode == MODE_SCAN && hashdb_block_size % hashdb_scan_sector_size != 0) {
-                std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
-                          << " or hashdb scan sector size=" << hashdb_scan_sector_size << ".\n"
-                          << "Sectors must align on hash block boundaries.\n"
-                          << "Specifically, hashdb_block_size \% hashdb_scan_sector_size must be zero.\n"
-                          << "Cannot continue.\n";
-                exit(1);
-            }
-
-            // hashdb_import_sector_size
-            if (hashdb_import_sector_size == 0) {
-                std::cerr << "Error.  Value for parameter 'hashdb_import_sector_size' is invalid.\n"
-                           << "Cannot continue.\n";
-                 exit(1);
-             }
- 
-            // for valid operation, import sectors must align on hash block boundaries
-            if (mode == MODE_IMPORT && hashdb_block_size % hashdb_import_sector_size != 0) {
-                std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
-                          << " or hashdb import sector size=" << hashdb_import_sector_size << ".\n"
-                          << "Sectors must align on hash block boundaries.\n"
-                          << "Specifically, hashdb_block_size \% hashdb_import_sector_size must be zero.\n"
-+            // hashdb_step_size
-+            if (hashdb_step_size == 0) {
-+                std::cerr << "Error.  Value for parameter 'hashdb_step_size' is invalid.\n"
-                           << "Cannot continue.\n";
-                 exit(1);
-             }
- 
-            // hashdb_import_repository_name
-            // checks not performed
-            // hashdb_import_max_duplicates
-            // checks not performed
-+            // indicate hashdb version
-+            std::cout << "hashdb: hashdb_version=" << hashdb_version() << "\n";
- 
-             // perform setup based on mode
-             switch(mode) {
-@@ -231,40 +290,49 @@
-                     // set the path to the hashdb
-                     hashdb_dir = sp.fs.get_outdir() + "/" + "hashdb.hdb";
- 
-                    // create the new hashdb manager for importing
-                    // currently, hashdb_dir is required to not exist
-                    hashdb = new hashdb_t(hashdb_dir,
-                                          hashdb_block_size,
-                                          hashdb_import_max_duplicates);
-
-                    // show relavent settable options
-                    std::string temp1((hashdb_ignore_empty_blocks) ? "YES" : "NO");
-+                    // show relevant settable options
-                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
-                               << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
-                              << "hashdb: hashdb_ignore_empty_blocks=" << temp1 << "\n"
-                              << "hashdb: hashdb_import_sector_size= " << hashdb_import_sector_size << "\n"
-                              << "hashdb: hashdb_import_repository_name= " << hashdb_import_repository_name << "\n"
-                              << "hashdb: hashdb_import_max_duplicates=" << hashdb_import_max_duplicates << "\n"
-+                              << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
-+                              << "hashdb: hashdb_repository_name= " << hashdb_repository_name << "\n"
-                               << "hashdb: Creating hashdb directory " << hashdb_dir << "\n";
-+
-+                    // open hashdb for importing
-+                    // currently, hashdb_dir is required to not exist
-+                    hashdb::settings_t settings;
-+                    settings.block_size = hashdb_block_size;
-+                    std::string error_message = hashdb::create_hashdb(hashdb_dir, settings, "");
-+                    if (error_message.size() != 0) {
-+                        std::cerr << "Error: " << error_message << "\n";
-+                        exit(1);
-+                    }
-+                    import_manager = new hashdb::import_manager_t(hashdb_dir, "");
-                     return;
-                 }
- 
-                 case MODE_SCAN: {
-                    // show relavent settable options
-                    std::string temp2((hashdb_ignore_empty_blocks) ? "YES" : "NO");
-+                    // show relevant settable options
-                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
-                               << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
-                              << "hashdb: hashdb_ignore_empty_blocks=" << temp2 << "\n"
-                              << "hashdb: hashdb_scan_path_or_socket=" << hashdb_scan_path_or_socket << "\n"
-                              << "hashdb: hashdb_scan_sector_size=" << hashdb_scan_sector_size << "\n";
-+                              << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
-+                              << "hashdb: hashdb_scan_path=" << hashdb_scan_path << "\n"
-+                              << "hashdb: hashdb_max_feature_file_lines=" << hashdb_max_feature_file_lines
-+                              << "\n";
-+
-+                    // open hashdb for scanning
-+                    scan_manager = new hashdb::scan_manager_t(hashdb_scan_path);
-+
-+                    // set the feature recorder to leave context alone but fix invalid utf8
-+                    sp.fs.get_name("identified_blocks")->set_flag(feature_recorder::FLAG_XML);
-+#ifdef DEBUG_V2_OUT
-+                    sp.fs.get_name("identified_blocks2")->set_flag(feature_recorder::FLAG_XML);
-+#endif
- 
-                    // open the hashdb manager for scanning
-                    hashdb = new hashdb_t(hashdb_scan_path_or_socket);
-                     return;
-                 }
- 
-                 case MODE_NONE: {
-                    // show relavent settable options
-+                    // show relevant settable options
-                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
-                               << "WARNING: the hashdb scanner is enabled but it will not perform any action\n"
-                               << "because no mode has been selected.  Please either select a hashdb mode or\n"
-@@ -285,7 +353,7 @@
-         case scanner_params::PHASE_SCAN: {
-             switch(mode) {
-                 case MODE_IMPORT:
-                     do_import(sp, rcb);
-+                    do_import(sp, rcb);
-                      return;
- 
-                 case MODE_SCAN:
-@@ -301,14 +369,17 @@
-         // shutdown
-         case scanner_params::PHASE_SHUTDOWN: {
-             switch(mode) {
-                case MODE_SCAN:
-                     delete hashdb;
-                     return;
-                 case MODE_IMPORT:
-                     delete hashdb;
-                     return;
-+                    delete import_manager;
-+                    return;
-+
-+                case MODE_SCAN:
-+                    delete scan_manager;
-+                    return;
-                 default:
-                     return;
-+                    // the user should have just left the scanner disabled.
-+                    // no action.
-+                    return;
-             }
-         }
- 
-@@ -327,170 +398,154 @@
-     // get the sbuf
-     const sbuf_t& sbuf = sp.sbuf;
- 
-    // there should be at least one block to process
-    if (sbuf.pagesize < hashdb_block_size) {
-      return;
-    }
-
-    // get count of blocks to process
-    size_t count = sbuf.bufsize / hashdb_import_sector_size;
-    while ((count * hashdb_import_sector_size) +
-           (hashdb_block_size - hashdb_import_sector_size) > sbuf.pagesize) {
-      --count;
-    }
-
-    // allocate space on heap for import_input
-    std::vector<hashdb_t::import_element_t>* import_input =
-       new std::vector<hashdb_t::import_element_t>;
-+    // get the filename from sbuf without the sbuf map file delimiter
-+    std::string path_without_map_file_delimiter =
-+              (sbuf.pos0.path.size() > 4) ?
-+              std::string(sbuf.pos0.path, 0, sbuf.pos0.path.size() - 4) : "";
-+ 
-+    // get the filename to use as the source filename
-+    std::stringstream ss;
-+    const size_t p=sbuf.pos0.path.find('/');
-+    if (p==std::string::npos) {
-+        // no directory in forensic path so explicitly include the filename
-+        ss << sp.fs.get_input_fname();
-+        if (sbuf.pos0.isRecursive()) {
-+            // forensic path is recursive so add "/" + forensic path
-+            ss << "/" << path_without_map_file_delimiter;
-+        }
-+    } else {
-+        // directory in forensic path so print forensic path as is
-+        ss << path_without_map_file_delimiter;
-+    }
-+    std::string source_filename = ss.str();
-+
-+    // calculate the file hash using the sbuf page
-+    const md5_t sbuf_hash = hash_generator::hash_buf(sbuf.buf, sbuf.pagesize);
-+    const std::string file_binary_hash =
-+               std::string(reinterpret_cast<const char*>(sbuf_hash.digest), 16);
-+
-+    // track count values
-+    size_t zero_count = 0;
-+    size_t nonprobative_count = 0;
- 
-    // import all the cryptograph hash values from all the blocks in sbuf
-    for (size_t i=0; i < count; ++i) {
-+    // import the cryptograph hash values from all the blocks in sbuf
-+    for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
- 
-        // calculate the offset associated with this index
-        size_t offset = i * hashdb_import_sector_size;
-+        // Create a child sbuf of what we would hash
-+        const sbuf_t sbuf_to_hash(sbuf,offset,hashdb_block_size);
- 
-         // ignore empty blocks
-        if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
-+        if (empty_sbuf(sbuf_to_hash)){
-+            ++zero_count;
-             continue;
-         }
- 
-        // calculate the hash for this sector-aligned hash block
-        hash_t hash = hash_generator::hash_buf(
-                                 sbuf.buf + offset,
-                                 hashdb_block_size);
-
-        // compose the filename based on the forensic path
-        std::stringstream ss;
-        size_t p=sbuf.pos0.path.find('/');
-        if (p==std::string::npos) {
-            // no directory in forensic path so explicitly include the filename
-            ss << sp.fs.get_input_fname();
-            if (sbuf.pos0.isRecursive()) {
-                // forensic path is recursive so add "/" + forensic path
-                ss << "/" << sbuf.pos0.path;
-            }
-        } else {
-            // directory in forensic path so print forensic path as is
-            ss << sbuf.pos0.path;
-+        // calculate the hash for this import-sector-aligned hash block
-+        const md5_t hash = hash_one_block(sbuf_to_hash);
-+        const std::string binary_hash(reinterpret_cast<const char*>(hash.digest), 16);
-+
-+        // put together any block classification labels
-+        // set flags based on specific tests on the block
-+        // Construct an sbuf from the block and subject it to the other tests
-+        const sbuf_t s(sbuf, offset, hashdb_block_size);
-+        std::stringstream ss_flags;
-+        if (ramp_trait(s))       ss_flags << "R";
-+        if (hist_trait(s))       ss_flags << "H";
-+        if (whitespace_trait(s)) ss_flags << "W";
-+        if (monotonic_trait(s))  ss_flags << "M";
-+
-+        // NOTE: shannon16 is Disabled because its results were not useful
-+        // and because it needs fixed to not generate sbuf read exception.
-+        //if (ss_flags.str().size() > 0) ss_flags << "," << shannon16(s);
-+
-+        // flags means nonprobative
-+        if (ss_flags.str().size() > 0) {
-+            ++nonprobative_count;
-         }
- 
-        // calculate the offset from the start of the media image
-        uint64_t image_offset = sbuf.pos0.offset + offset;
-
-        // create and add the import element to the import input
-        import_input->push_back(hashdb_t::import_element_t(
-                                 hash,
-                                 hashdb_import_repository_name,
-                                 ss.str(),
-                                 image_offset));
-    }
-
-    // perform the import
-    int status = hashdb->import(*import_input);
-
-    if (status != 0) {
-        std::cerr << "scan_hashdb import failure\n";
-    }
-
-    // clean up
-    delete import_input;
-+        // import the hash
-+        import_manager->insert_hash(binary_hash,
-+                                    0,    // entropy
-+                                    ss_flags.str(),
-+                                    file_binary_hash);
-+    }
-+
-+    // insert the source name pair
-+    import_manager->insert_source_name(file_binary_hash,
-+                              hashdb_repository_name, source_filename);
-+
-+    // insert the source data
-+    import_manager->insert_source_data(file_binary_hash,
-+                                       sbuf.pagesize,
-+                                       "", // file type
-+                                       zero_count,
-+                                       nonprobative_count);
- }
- 
- // perform scan
- static void do_scan(const class scanner_params &sp,
-                     const recursion_control_block &rcb) {
- 
-+    // get the feature recorder
-+    feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
-+#ifdef DEBUG_V2_OUT
-+    feature_recorder* identified_blocks_recorder2 = sp.fs.get_name("identified_blocks2");
-+#endif
-+
-     // get the sbuf
-     const sbuf_t& sbuf = sp.sbuf;
- 
-    // there should be at least one block to process
-    if (sbuf.pagesize < hashdb_block_size) {
-      return;
-    }
-+    // process cryptographic hash values for blocks along sector boundaries
-+    for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
- 
-    // get count of blocks to process
-    size_t count = sbuf.bufsize / hashdb_scan_sector_size;
-    while ((count * hashdb_scan_sector_size) +
-           (hashdb_block_size - hashdb_scan_sector_size) > sbuf.pagesize) {
-      --count;
-    }
-
-    // allocate space on heap for scan_input
-    std::vector<hash_t>* scan_input = new std::vector<hash_t>;
-
-    // allocate space on heap for the offset lookup table
-    std::vector<uint32_t>* offset_lookup_table = new std::vector<uint32_t>;
-
-    // get the cryptograph hash values of all the blocks along
-    // sector boundaries from sbuf
-    for (size_t i=0; i<count; ++i) {
-+        // stop recording if feature file line count is at requested max
-+        if (hashdb_max_feature_file_lines > 0 && identified_blocks_recorder->count() >=
-+                                                   hashdb_max_feature_file_lines) {
-+            break;
-+        }
- 
-        // calculate the offset associated with this index
-        size_t offset = i * hashdb_scan_sector_size;
-+        // Create a child sbuf of the block
-+        const sbuf_t sbuf_to_hash(sbuf, offset, hashdb_block_size);
- 
-         // ignore empty blocks
-        if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
-+        if (empty_sbuf(sbuf_to_hash)){
-             continue;
-         }
- 
-        // add the offset to the offset lookup table
-        offset_lookup_table->push_back(offset);
-
-        // calculate and add the hash to the scan input
-        scan_input->push_back(hash_generator::hash_buf(
-                    sbuf.buf + offset, hashdb_block_size));
-    }
-
-    // allocate space on heap for scan_output
-    hashdb_t::scan_output_t* scan_output = new hashdb_t::scan_output_t;
-
-    // perform the scan
-    int status = hashdb->scan(*scan_input, *scan_output);
-
-    if (status != 0) {
-        std::cerr << "Error: scan_hashdb scan failure.  Aborting.\n";
-        exit(1);
-    }
-
-    // get the feature recorder
-    feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
-+        // calculate the hash for this sector-aligned hash block
-+        const md5_t hash = hash_one_block(sbuf_to_hash);
-+        const std::string binary_hash =
-+               std::string(reinterpret_cast<const char*>(hash.digest), 16);
-+
-+        // scan for the hash
-+        std::string json_text = scan_manager->find_hash_json(
-+                      hashdb::scan_mode_t::EXPANDED_OPTIMIZED, binary_hash);
-+
-+        if (json_text.size() == 0) {
-+          // hash not found
-+          continue;
-+        }
- 
-    // record each feature returned in the response
-    for (hashdb_t::scan_output_t::const_iterator it=scan_output->begin(); it!= scan_output->end(); ++it) {
-+        // prepare fields to record the feature
- 
-        // prepare forensic path (pos0, feature, context)
-        // as (pos0, hash_string, count_string)
-+        // get hash_string from hash
-+        std::string hash_string = hash.hexdigest();
- 
-        // pos0
-        pos0_t pos0 = sbuf.pos0 + offset_lookup_table->at(it->first);
-+        // record the feature, there is no context field
-+        identified_blocks_recorder->write(sbuf.pos0+offset, hash_string, json_text);
- 
-        // hash_string
-        std::string hash_string = scan_input->at(it->first).hexdigest();
-+#ifdef DEBUG_V2_OUT
-+        size_t count = scan_manager->find_hash_count(binary_hash);
- 
-        // count
-+        // build context field
-         std::stringstream ss;
-        ss << it->second;
-        std::string count_string = ss.str();
-+        ss << "{\"count\":" << count << "}";
- 
-         // record the feature
-        identified_blocks_recorder->write(pos0, hash_string, count_string);
-    }
-
-    // clean up
-    delete scan_input;
-    delete offset_lookup_table;
-    delete scan_output;
-}
-+        identified_blocks_recorder2->write(sbuf.pos0+offset, hash_string, ss.str());
-+#endif
- 
-// detect if block is empty
-inline bool is_empty_block(const uint8_t *buf) {
-    for (size_t i=1; i<hashdb_block_size; i++) {
-        if (buf[i] != buf[0]) {
-            return false;
-        }
-     }
-    return true;
- }
- 
- #endif
-
--- a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_other_minor_fixes.patch
+++ b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_other_minor_fixes.patch
@ -1,60 +0,0 @@
-diff -ur a/configure.ac b/configure.ac
--- a/configure.ac	2014-09-16 23:08:06.000000000 +0400
-+++ b/configure.ac	2019-03-17 10:08:12.594871130 +0300
-@@ -150,7 +150,7 @@
- AC_ARG_ENABLE([flexscanners],
-               AS_HELP_STRING([--disable-flexscanners], [disable FLEX-based scanners]),
-               [],
-              [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]), flexscanners='yes'])
-+              [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]) flexscanners='yes'])
- AM_CONDITIONAL([FLEXSCANNERS_ENABLED], [test "yes" = "$flexscanners"])
- 
- 
-diff -ur a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4
--- a/m4/ax_boost_base.m4	2014-09-16 22:34:00.000000000 +0400
-+++ b/m4/ax_boost_base.m4	2019-03-17 10:12:31.849532373 +0300
-@@ -107,7 +107,7 @@
-     dnl this location ist chosen if boost libraries are installed with the --layout=system option
-     dnl or if you install boost with RPM
-     if test "$ac_boost_path" != ""; then
-        BOOST_CPPFLAGS="-isystem$ac_boost_path/include"
-+        BOOST_CPPFLAGS="-I$ac_boost_path/include"
-         for ac_boost_path_tmp in $libsubdirs; do
-                 if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then
-                         BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp"
-@@ -126,7 +126,7 @@
-                     if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-                 done
-                 BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir"
-                BOOST_CPPFLAGS="-isystem$ac_boost_path_tmp/include"
-+                BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
-                 break;
-             fi
-         done
-@@ -179,7 +179,7 @@
-                         _version=$_version_tmp
-                     fi
-                     VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-                    BOOST_CPPFLAGS="-isystem$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
-+                    BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
-                 done
-             fi
-         else
-@@ -202,7 +202,7 @@
-             done
- 
-             VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
-            BOOST_CPPFLAGS="-isystem$best_path/include/boost-$VERSION_UNDERSCORE"
-+            BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
-             if test "$ac_boost_lib_path" = ""; then
-                 for libsubdir in $libsubdirs ; do
-                     if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
-@@ -221,7 +221,7 @@
-                     V_CHECK=`expr $stage_version_shorten \>\= $_version`
-                     if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
-                         AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
-                        BOOST_CPPFLAGS="-isystem$BOOST_ROOT"
-+                        BOOST_CPPFLAGS="-I$BOOST_ROOT"
-                         BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
-                     fi
-                 fi
--- a/app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch
+++ b/app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch
@ -0,0 +1,12 @@
+diff --git a/src/be20_api/unicode_escape.h b/src/be20_api/unicode_escape.h
+--- a/src/be20_api/unicode_escape.h
+++ b/src/be20_api/unicode_escape.h
+@@ -8,7 +8,7 @@
+ #define UNICODE_ESCAPE_H
+ 
+ #include <codecvt>
+-//#include <cstdint>
+#include <cstdint>
+ #include <cstring>
+ #include <cwctype>
+ #include <iostream>
--- a/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild
+++ b/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild
@ -1,4 +1,4 @@
-# Copyright 1999-2020 Gentoo Authors
+# Copyright 1999-2023 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2

 EAPI=7
@ -11,12 +11,13 @@ DESCRIPTION="The hashdb block hash database tool and API"
 HOMEPAGE="https://github.com/NPS-DEEP/hashdb"
 SRC_URI="https://github.com/NPS-DEEP/hashdb/archive/v${PV}.tar.gz -> ${P}.tar.gz"

-KEYWORDS="~amd64 ~hppa ~ppc ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~ppc-macos"
+KEYWORDS="amd64 ~arm64 ~x86"
 LICENSE="GPL-3 public-domain"
 SLOT="0"

 IUSE="python static-libs test"
 REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
+RESTRICT="!test? ( test )"

 RDEPEND="
 	app-forensics/libewf
				`@ -0,0 +1 @@`
				`DIST bulk_extractor-2.0.3.tar.gz 8456967 BLAKE2B b8184c24dfc1ba9004f44f19a118cb84a9938f1aaf60663fe0bb259045ca4fe86ab339f8a265a71463fdc7e09b7a2f42989990c863f0888f5a2b4f80bd791677 SHA512 e1554f7f9863122ccd7405a5ec713fb3a09eed8e45db4c0c9580e8e914f1a477664683109c2b05e80a5dab169db8aa12ec8d0a49d8a959dc4ab622c11e0612f5`