From b8361a152fa50a4b96a9b34ed0832a22c834f689 Mon Sep 17 00:00:00 2001 From: Anton Bolshakov Date: Wed, 7 Jun 2023 11:00:19 +0800 Subject: [PATCH] bulk_extractor: fix https://github.com/pentoo/pentoo-overlay/issues/1519 --- app-forensics/bulk_extractor/Manifest | 1 + .../bulk_extractor-2.0.3-r1.ebuild | 116 +++ .../bulk_extractor-2.0.3.ebuild | 1 + .../bulk_extractor/bulk_extractor-9999.ebuild | 116 --- ...-1.5.5_fix_call_of_overloaded_errors.patch | 24 - ..._hashdb-3.1.0_and_old_bulk_extractor.patch | 733 ------------------ ...lk_extractor-1.5.5_other_minor_fixes.patch | 60 -- .../files/bulk_extractor-2.0.3_uint32_t.patch | 12 + dev-libs/hashdb/hashdb-3.1.0-r1.ebuild | 5 +- 9 files changed, 133 insertions(+), 935 deletions(-) create mode 100644 app-forensics/bulk_extractor/Manifest create mode 100644 app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild delete mode 100644 app-forensics/bulk_extractor/bulk_extractor-9999.ebuild delete mode 100644 app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch delete mode 100644 app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch delete mode 100644 app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_other_minor_fixes.patch create mode 100644 app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch diff --git a/app-forensics/bulk_extractor/Manifest b/app-forensics/bulk_extractor/Manifest new file mode 100644 index 000000000..b13599633 --- /dev/null +++ b/app-forensics/bulk_extractor/Manifest @@ -0,0 +1 @@ +DIST bulk_extractor-2.0.3.tar.gz 8456967 BLAKE2B b8184c24dfc1ba9004f44f19a118cb84a9938f1aaf60663fe0bb259045ca4fe86ab339f8a265a71463fdc7e09b7a2f42989990c863f0888f5a2b4f80bd791677 SHA512 e1554f7f9863122ccd7405a5ec713fb3a09eed8e45db4c0c9580e8e914f1a477664683109c2b05e80a5dab169db8aa12ec8d0a49d8a959dc4ab622c11e0612f5 diff --git a/app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild b/app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild new file mode 100644 index 000000000..eb97918d6 --- /dev/null +++ b/app-forensics/bulk_extractor/bulk_extractor-2.0.3-r1.ebuild @@ -0,0 +1,116 @@ +# Copyright 1999-2023 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 + +inherit autotools desktop eutils xdg-utils + +DESCRIPTION="Scans a disk image for regular expressions and other content" +HOMEPAGE="https://github.com/simsong/bulk_extractor" +SRC_URI="https://digitalcorpora.s3.amazonaws.com/downloads/bulk_extractor/${P}.tar.gz" + +LICENSE="GPL-2" +SLOT="0" +KEYWORDS="amd64 ~x86" + +#fails to compile with ewf +#fails to compile with exiv2 +#fails to compile without rar +IUSE="aff doc beviewer exiv2 hashdb +rar" + +# ewf? ( app-forensics/libewf ) +RDEPEND=" + aff? ( app-forensics/afflib ) + dev-libs/boost + dev-libs/expat + dev-libs/openssl:0= + dev-db/sqlite:3 + dev-libs/libxml2 + exiv2? ( media-gfx/exiv2 ) + sys-libs/zlib + hashdb? ( dev-libs/hashdb ) + beviewer? ( + || ( virtual/jre:* virtual/jdk:* ) + )" + +DEPEND="${RDEPEND} + doc? ( app-doc/doxygen ) + virtual/man" + +BDEPEND=" + sys-devel/flex + virtual/pkgconfig" + +src_prepare() { + eapply "${FILESDIR}/bulk_extractor-2.0.3_uint32_t.patch" + + if [[ ${PV} != *9999 ]]; then + sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \ + -i configure.ac || die + fi + + eautoreconf + default +} + +src_configure() { + econf \ + --disable-o3 \ + --disable-libewf +# $(use ewf || echo "--disable-libewf") +# $(use beviewer || echo "--disable-BEViewer") \ +# $(use exiv2 && echo "--enable-exiv2") \ +# $(use aff || echo "--disable-afflib") \ +# $(use hashdb || echo "--disable-hashdb") \ +# $(use rar || echo "--disable-rar" ) +} + +src_install() { + dobin src/${PN} + doman man/*.1 + dodoc AUTHORS ChangeLog NEWS README.md + + if use doc ; then + pushd doc/doxygen >/dev/null || die + doxygen || die "doxygen failed" + popd >/dev/null || die + + dodoc -r \ + doc/doxygen/html \ + doc/Diagnostics_Notes \ + doc/announce \ + doc/*.{pdf,txt,md} \ + doc/programmer_manual/*.pdf + fi + +# if use beviewer; then +# local bev_dir="/opt/beviewer-${PV}" + +# insinto "${bev_dir}" +# doins java_gui/BEViewer.jar + +# insinto /usr/share/pixmaps +# newins java_gui/icons/24/run-build-install.png ${PN}.png + +# make_wrapper "beviewer" \ +# "/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\"" +# make_desktop_entry \ +# "beviewer" \ +# "BEViewer (bulk_extractor)" \ +# "${PN}" "Utility" +# fi +} + +#pkg_postinst() { +# if use beviewer; then +# xdg_icon_cache_update +# xdg_desktop_database_update +# fi +#} + +#pkg_postrm() { +# if use beviewer; then +# xdg_icon_cache_update +# xdg_desktop_database_update +# fi +#} diff --git a/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild b/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild index df4511323..9b3bd9f29 100644 --- a/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild +++ b/app-forensics/bulk_extractor/bulk_extractor-2.0.3.ebuild @@ -15,6 +15,7 @@ if [[ ${PV} != *9999 ]]; then #EGIT_COMMIT="8563614408834087f242297813de9f75bdc9bedc" EGIT_OVERRIDE_COMMIT_SIMSONG_BULK_EXTRACTOR="v2.0.3" # EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1" +# EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1" # EGIT_OVERRIDE_COMMIT_DFXML_WORKING_GROUP_DFXML_CPP="a283c888b4bb84b3dab937928f9495290a5a8a47" # EGIT_OVERRIDE_COMMIT_NEMTRIF_UTFCPP="2ad995746bf1731d5e21cde47c9c3deff56bdbc2" KEYWORDS="amd64 ~x86" diff --git a/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild b/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild deleted file mode 100644 index 6e6785998..000000000 --- a/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 1999-2022 Gentoo Authors -# Distributed under the terms of the GNU General Public License v2 - -EAPI=7 - -inherit autotools desktop eutils git-r3 xdg-utils - -DESCRIPTION="Scans a disk image for regular expressions and other content" -HOMEPAGE="https://github.com/simsong/bulk_extractor" - -# Please check a ".gitmodules" file on upstream before bump it -EGIT_REPO_URI="https://github.com/simsong/bulk_extractor" -if [[ ${PV} != *9999 ]]; then - EGIT_COMMIT="a52b133a3c56a483caa59eb8c68634ee1648c4ec" # 20191111 release - KEYWORDS="~amd64 ~x86" -fi - -LICENSE="GPL-2" -SLOT="0" -IUSE="aff doc +beviewer +ewf +exiv2 hashdb rar" - -RDEPEND=" - aff? ( app-forensics/afflib ) - dev-libs/boost - dev-libs/expat - dev-libs/openssl:0= - dev-db/sqlite:3 - dev-libs/libxml2 - ewf? ( app-forensics/libewf ) - exiv2? ( media-gfx/exiv2 ) - sys-libs/zlib - hashdb? ( dev-libs/hashdb ) - beviewer? ( - || ( virtual/jre:* virtual/jdk:* ) - )" - -DEPEND="${RDEPEND} - doc? ( app-doc/doxygen ) - virtual/man" - -BDEPEND=" - sys-devel/flex - virtual/pkgconfig" - -src_prepare() { - eapply "${FILESDIR}/add_exiv2-0.27_api_support.patch" - - if [[ ${PV} != *9999 ]]; then - sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \ - -i configure.ac || die - fi - - eautoreconf - default -} - -src_configure() { - econf \ - --without-o3 \ - $(use aff || echo "--disable-afflib") \ - $(use beviewer || echo "--disable-BEViewer") \ - $(use ewf || echo "--disable-libewf") \ - $(use exiv2 && echo "--enable-exiv2") \ - $(use hashdb || echo "--disable-hashdb") \ - $(use rar || echo "--disable-rar" ) -} - -src_install() { - dobin src/${PN} plugins/plugin_test - doman man/*.1 - dodoc AUTHORS ChangeLog NEWS README.md - - if use doc ; then - pushd doc/doxygen >/dev/null || die - doxygen || die "doxygen failed" - popd >/dev/null || die - - dodoc -r \ - doc/doxygen/html \ - doc/Diagnostics_Notes \ - doc/announce \ - doc/*.{pdf,txt,md} \ - doc/programmer_manual/*.pdf - fi - - if use beviewer; then - local bev_dir="/opt/beviewer-${PV}" - - insinto "${bev_dir}" - doins java_gui/BEViewer.jar - - insinto /usr/share/pixmaps - newins java_gui/icons/24/run-build-install.png ${PN}.png - - make_wrapper "beviewer" \ - "/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\"" - make_desktop_entry \ - "beviewer" \ - "BEViewer (bulk_extractor)" \ - "${PN}" "Utility" - fi -} - -pkg_postinst() { - if use beviewer; then - xdg_icon_cache_update - xdg_desktop_database_update - fi -} - -pkg_postrm() { - if use beviewer; then - xdg_icon_cache_update - xdg_desktop_database_update - fi -} diff --git a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch deleted file mode 100644 index d87d838d7..000000000 --- a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_fix_call_of_overloaded_errors.patch +++ /dev/null @@ -1,24 +0,0 @@ -diff -ur a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp ---- a/src/scan_exiv2.cpp 2014-09-16 22:34:00.000000000 +0400 -+++ b/src/scan_exiv2.cpp 2019-03-17 08:38:29.479753464 +0300 -@@ -68,7 +68,7 @@ - * Used for helping to convert libexiv2's GPS format to decimal lat/long - */ - --static double stod(string s) -+static double sub_stod(string s) - { - double d=0; - sscanf(s.c_str(),"%lf",&d); -@@ -78,9 +78,9 @@ - static double rational(string s) - { - std::vector parts = split(s,'/'); -- if(parts.size()!=2) return stod(s); // no slash, so return without -- double top = stod(parts[0]); -- double bot = stod(parts[1]); -+ if(parts.size()!=2) return sub_stod(s); // no slash, so return without -+ double top = sub_stod(parts[0]); -+ double bot = sub_stod(parts[1]); - return bot>0 ? top / bot : top; - } diff --git a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch deleted file mode 100644 index e38721397..000000000 --- a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch +++ /dev/null @@ -1,733 +0,0 @@ -diff -ur a/src/scan_hashdb.cpp b/src/scan_hashdb.cpp ---- a/src/scan_hashdb.cpp 2014-09-16 22:34:00.000000000 +0400 -+++ b/src/scan_hashdb.cpp 2019-03-16 14:07:05.887464616 +0300 -@@ -31,47 +31,146 @@ - - #ifdef HAVE_HASHDB - -+//#define DEBUG_V2_OUT -+ - #include "hashdb.hpp" - #include - - #include --#include // for getpid --#include // for getpid -+#include -+#include // for getpid -+#include // for getpid - - // user settings --static std::string hashdb_mode="none"; // import or scan --static uint32_t hashdb_block_size=4096; // import or scan --static bool hashdb_ignore_empty_blocks=true; // import or scan --static std::string hashdb_scan_path_or_socket="your_hashdb_directory"; // scan only --static size_t hashdb_scan_sector_size = 512; // scan only --static size_t hashdb_import_sector_size = 4096; // import only --static std::string hashdb_import_repository_name="default_repository"; // import only --static uint32_t hashdb_import_max_duplicates=0; // import only -+static std::string hashdb_mode="none"; // import or scan -+static uint32_t hashdb_block_size=512; // import or scan -+static uint32_t hashdb_step_size=512; // import or scan -+static std::string hashdb_scan_path="your_hashdb_directory"; // scan only -+static std::string hashdb_repository_name="default_repository"; // import only -+static uint32_t hashdb_max_feature_file_lines=0; // scan only for feature file - - // runtime modes - // scanner mode - enum mode_type_t {MODE_NONE, MODE_SCAN, MODE_IMPORT}; - static mode_type_t mode = MODE_NONE; - --// internal helper functions --static void do_import(const class scanner_params &sp, -- const recursion_control_block &rcb); --static void do_scan(const class scanner_params &sp, -- const recursion_control_block &rcb); --inline bool is_empty_block(const uint8_t *buf); -- - // global state - - // hashdb directory, import only - static std::string hashdb_dir; - - // hash type --typedef md5_t hash_t; - typedef md5_generator hash_generator; - - // hashdb manager --typedef hashdb_t__ hashdb_t; --hashdb_t* hashdb; -+static hashdb::import_manager_t* import_manager; -+static hashdb::scan_manager_t* scan_manager; -+ -+static void do_import(const class scanner_params &sp, -+ const recursion_control_block &rcb); -+static void do_scan(const class scanner_params &sp, -+ const recursion_control_block &rcb); -+ -+ -+// safely hash sbuf range without overflow failure -+inline const md5_t hash_one_block(const sbuf_t &sbuf) -+{ -+ if (sbuf.bufsize >= hashdb_block_size) { -+ // hash from the beginning -+ return hash_generator::hash_buf(sbuf.buf, hashdb_block_size); -+ } -+ // hash the available part and zero-fill -+ hash_generator g; -+ g.update(sbuf.buf, sbuf.bufsize); -+ -+ // hash in extra zeros to fill out the block -+ size_t extra = hashdb_block_size - sbuf.bufsize; -+ std::vector zeros(extra); -+ g.update(&zeros[0], extra); -+ return g.final(); -+} -+ -+// rules for determining if a block should be ignored -+static bool ramp_trait(const sbuf_t &sbuf) -+{ -+ if (sbuf.pagesize < 8) { -+ // not enough to process -+ return false; -+ } -+ -+ uint32_t count = 0; -+ for(size_t i=0;i sbuf.pagesize/8; -+} -+ -+static bool hist_trait(const sbuf_t &sbuf) -+{ -+ if (sbuf.pagesize < hashdb_block_size) { -+ // do not perform any histogram analysis on short blocks -+ return false; -+ } -+ -+ std::map hist; -+ for(size_t i=0;i::const_iterator it = hist.begin();it != hist.end(); it++){ -+ if ((it->second) > hashdb_block_size/16){ -+ return true; -+ } -+ } -+ return false; -+} -+ -+static bool whitespace_trait(const sbuf_t &sbuf) -+{ -+ size_t count = 0; -+ for(size_t i=0;i= (sbuf.pagesize * 3)/4; -+} -+ -+static bool monotonic_trait(const sbuf_t &sbuf) -+{ -+ if (sbuf.pagesize < 16) { -+ // not enough data -+ return false; -+ } -+ -+ const double total = sbuf.pagesize / 4.0; -+ int increasing = 0, decreasing = 0, same = 0; -+ for (size_t i=0; i+8 sbuf.get32u(i)) { -+ increasing++; -+ } else if (sbuf.get32u(i+4) < sbuf.get32u(i)) { -+ decreasing++; -+ } else { -+ same++; -+ } -+ } -+ if (increasing / total >= 0.75) return true; -+ if (decreasing / total >= 0.75) return true; -+ if (same / total >= 0.75) return true; -+ return false; -+} -+ -+// detect if block is all the same -+inline bool empty_sbuf(const sbuf_t &sbuf) -+{ -+ for (size_t i=1; iname = "hashdb"; - sp.info->author = "Bruce Allen"; -- sp.info->description = "Search cryptographic hash IDs against hashes in a hashdb block hash database"; -+ sp.info->description = desc; - sp.info->flags = scanner_info::SCANNER_DISABLED; - - // hashdb_mode -@@ -97,60 +199,52 @@ - - // hashdb_block_size - sp.info->get_config("hashdb_block_size", &hashdb_block_size, -- "Hash block size, in bytes, used to generate hashes"); -+ "Selects the block size to hash, in bytes."); - -- // hashdb_ignore_empty_blocks -- sp.info->get_config("hashdb_ignore_empty_blocks", &hashdb_ignore_empty_blocks, -- "Selects to ignore empty blocks."); -- -- // hashdb_scan_path_or_socket -- std::stringstream ss_hashdb_scan_path_or_socket; -- ss_hashdb_scan_path_or_socket -- << "File path to a hash database or\n" -- << " socket to a hashdb server to scan against. Valid only in scan mode."; -- sp.info->get_config("hashdb_scan_path_or_socket", &hashdb_scan_path_or_socket, -- ss_hashdb_scan_path_or_socket.str()); -- -- // hashdb_scan_sector_size -- std::stringstream ss_hashdb_scan_sector_size; -- ss_hashdb_scan_sector_size -- << "Selects the scan sector size. Scans along\n" -- << " sector boundaries. Valid only in scan mode."; -- sp.info->get_config("hashdb_scan_sector_size", &hashdb_scan_sector_size, -- ss_hashdb_scan_sector_size.str()); -- -- // hashdb_import_sector_size -- std::stringstream ss_hashdb_import_sector_size; -- ss_hashdb_import_sector_size -- << "Selects the import sector size. Imports along\n" -- << " sector boundaries. Valid only in import mode."; -- sp.info->get_config("hashdb_import_sector_size", &hashdb_import_sector_size, -- ss_hashdb_import_sector_size.str()); -+ // hashdb_step_size -+ std::stringstream ss_hashdb_step_size; -+ ss_hashdb_step_size -+ << "Selects the step size. Scans and imports along\n" -+ << " this step value."; -+ sp.info->get_config("hashdb_step_size", &hashdb_step_size, -+ ss_hashdb_step_size.str()); -+ -+ -+ // hashdb_scan_path -+ std::stringstream ss_hashdb_scan_path; -+ ss_hashdb_scan_path -+ << "File path to a hash database to scan against.\n" -+ << " Valid only in scan mode."; -+ sp.info->get_config("hashdb_scan_path", &hashdb_scan_path, -+ ss_hashdb_scan_path.str()); - -- // hashdb_import_repository_name -+ // hashdb_repository_name - std::stringstream ss_hashdb_import_repository_name; - ss_hashdb_import_repository_name - << "Sets the repository name to\n" - << " attribute the import to. Valid only in import mode."; -- sp.info->get_config("hashdb_import_repository_name", -- &hashdb_import_repository_name, -+ sp.info->get_config("hashdb_repository_name", -+ &hashdb_repository_name, - ss_hashdb_import_repository_name.str()); - -- // hashdb_import_max_duplicates -- std::stringstream ss_hashdb_import_max_duplicates; -- ss_hashdb_import_max_duplicates -- << "The maximum number of duplicates to import\n" -- << " for a given hash value, or 0 for no limit. Valid only in import mode."; -- sp.info->get_config("hashdb_import_max_duplicates", &hashdb_import_max_duplicates, -- ss_hashdb_import_max_duplicates.str()); -- -- - // configure the feature file to accept scan features - // but only if in scan mode - if (hashdb_mode == "scan") { - sp.info->feature_names.insert("identified_blocks"); -+#ifdef DEBUG_V2_OUT -+ sp.info->feature_names.insert("identified_blocks2"); -+#endif - } - -+ // hashdb_max_feature_file_lines -+ std::stringstream ss_hashdb_max_feature_file_lines; -+ ss_hashdb_max_feature_file_lines -+ << "The maximum number of features lines to record\n" -+ << " or 0 for no limit. Valid only in scan mode."; -+ sp.info->get_config("hashdb_max_feature_file_lines", &hashdb_max_feature_file_lines, -+ ss_hashdb_max_feature_file_lines.str()); -+ -+ - return; - } - -@@ -168,62 +262,27 @@ - } else { - // bad mode - std::cerr << "Error. Parameter 'hashdb_mode' value '" -- << hashdb_mode << "' is invalid.\n" -+ << hashdb_mode << "' must be [none|import|scan].\n" - << "Cannot continue.\n"; - exit(1); - } - -- // hashdb_ignore_empty_blocks -- // checks not performed -- - // hashdb_block_size - if (hashdb_block_size == 0) { - std::cerr << "Error. Value for parameter 'hashdb_block_size' is invalid.\n" -- << "Cannot continue.\n"; -- exit(1); -- } -- -- // hashdb_scan_path_or_socket -- // checks not performed -- -- // hashdb_scan_sector_size -- if (hashdb_scan_sector_size == 0) { -- std::cerr << "Error. Value for parameter 'hashdb_scan_sector_size' is invalid.\n" -- << "Cannot continue.\n"; -- exit(1); -- } -- -- // for valid operation, scan sectors must align on hash block boundaries -- if (mode == MODE_SCAN && hashdb_block_size % hashdb_scan_sector_size != 0) { -- std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size -- << " or hashdb scan sector size=" << hashdb_scan_sector_size << ".\n" -- << "Sectors must align on hash block boundaries.\n" -- << "Specifically, hashdb_block_size \% hashdb_scan_sector_size must be zero.\n" -- << "Cannot continue.\n"; -- exit(1); -- } -- -- // hashdb_import_sector_size -- if (hashdb_import_sector_size == 0) { -- std::cerr << "Error. Value for parameter 'hashdb_import_sector_size' is invalid.\n" - << "Cannot continue.\n"; - exit(1); - } - -- // for valid operation, import sectors must align on hash block boundaries -- if (mode == MODE_IMPORT && hashdb_block_size % hashdb_import_sector_size != 0) { -- std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size -- << " or hashdb import sector size=" << hashdb_import_sector_size << ".\n" -- << "Sectors must align on hash block boundaries.\n" -- << "Specifically, hashdb_block_size \% hashdb_import_sector_size must be zero.\n" -+ // hashdb_step_size -+ if (hashdb_step_size == 0) { -+ std::cerr << "Error. Value for parameter 'hashdb_step_size' is invalid.\n" - << "Cannot continue.\n"; - exit(1); - } - -- // hashdb_import_repository_name -- // checks not performed -- // hashdb_import_max_duplicates -- // checks not performed -+ // indicate hashdb version -+ std::cout << "hashdb: hashdb_version=" << hashdb_version() << "\n"; - - // perform setup based on mode - switch(mode) { -@@ -231,40 +290,49 @@ - // set the path to the hashdb - hashdb_dir = sp.fs.get_outdir() + "/" + "hashdb.hdb"; - -- // create the new hashdb manager for importing -- // currently, hashdb_dir is required to not exist -- hashdb = new hashdb_t(hashdb_dir, -- hashdb_block_size, -- hashdb_import_max_duplicates); -- -- // show relavent settable options -- std::string temp1((hashdb_ignore_empty_blocks) ? "YES" : "NO"); -+ // show relevant settable options - std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n" - << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n" -- << "hashdb: hashdb_ignore_empty_blocks=" << temp1 << "\n" -- << "hashdb: hashdb_import_sector_size= " << hashdb_import_sector_size << "\n" -- << "hashdb: hashdb_import_repository_name= " << hashdb_import_repository_name << "\n" -- << "hashdb: hashdb_import_max_duplicates=" << hashdb_import_max_duplicates << "\n" -+ << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n" -+ << "hashdb: hashdb_repository_name= " << hashdb_repository_name << "\n" - << "hashdb: Creating hashdb directory " << hashdb_dir << "\n"; -+ -+ // open hashdb for importing -+ // currently, hashdb_dir is required to not exist -+ hashdb::settings_t settings; -+ settings.block_size = hashdb_block_size; -+ std::string error_message = hashdb::create_hashdb(hashdb_dir, settings, ""); -+ if (error_message.size() != 0) { -+ std::cerr << "Error: " << error_message << "\n"; -+ exit(1); -+ } -+ import_manager = new hashdb::import_manager_t(hashdb_dir, ""); - return; - } - - case MODE_SCAN: { -- // show relavent settable options -- std::string temp2((hashdb_ignore_empty_blocks) ? "YES" : "NO"); -+ // show relevant settable options - std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n" - << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n" -- << "hashdb: hashdb_ignore_empty_blocks=" << temp2 << "\n" -- << "hashdb: hashdb_scan_path_or_socket=" << hashdb_scan_path_or_socket << "\n" -- << "hashdb: hashdb_scan_sector_size=" << hashdb_scan_sector_size << "\n"; -+ << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n" -+ << "hashdb: hashdb_scan_path=" << hashdb_scan_path << "\n" -+ << "hashdb: hashdb_max_feature_file_lines=" << hashdb_max_feature_file_lines -+ << "\n"; -+ -+ // open hashdb for scanning -+ scan_manager = new hashdb::scan_manager_t(hashdb_scan_path); -+ -+ // set the feature recorder to leave context alone but fix invalid utf8 -+ sp.fs.get_name("identified_blocks")->set_flag(feature_recorder::FLAG_XML); -+#ifdef DEBUG_V2_OUT -+ sp.fs.get_name("identified_blocks2")->set_flag(feature_recorder::FLAG_XML); -+#endif - -- // open the hashdb manager for scanning -- hashdb = new hashdb_t(hashdb_scan_path_or_socket); - return; - } - - case MODE_NONE: { -- // show relavent settable options -+ // show relevant settable options - std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n" - << "WARNING: the hashdb scanner is enabled but it will not perform any action\n" - << "because no mode has been selected. Please either select a hashdb mode or\n" -@@ -285,7 +353,7 @@ - case scanner_params::PHASE_SCAN: { - switch(mode) { - case MODE_IMPORT: -- do_import(sp, rcb); -+ do_import(sp, rcb); - return; - - case MODE_SCAN: -@@ -301,14 +369,17 @@ - // shutdown - case scanner_params::PHASE_SHUTDOWN: { - switch(mode) { -- case MODE_SCAN: -- delete hashdb; -- return; - case MODE_IMPORT: -- delete hashdb; -- return; -+ delete import_manager; -+ return; -+ -+ case MODE_SCAN: -+ delete scan_manager; -+ return; - default: -- return; -+ // the user should have just left the scanner disabled. -+ // no action. -+ return; - } - } - -@@ -327,170 +398,154 @@ - // get the sbuf - const sbuf_t& sbuf = sp.sbuf; - -- // there should be at least one block to process -- if (sbuf.pagesize < hashdb_block_size) { -- return; -- } -- -- // get count of blocks to process -- size_t count = sbuf.bufsize / hashdb_import_sector_size; -- while ((count * hashdb_import_sector_size) + -- (hashdb_block_size - hashdb_import_sector_size) > sbuf.pagesize) { -- --count; -- } -- -- // allocate space on heap for import_input -- std::vector* import_input = -- new std::vector; -+ // get the filename from sbuf without the sbuf map file delimiter -+ std::string path_without_map_file_delimiter = -+ (sbuf.pos0.path.size() > 4) ? -+ std::string(sbuf.pos0.path, 0, sbuf.pos0.path.size() - 4) : ""; -+ -+ // get the filename to use as the source filename -+ std::stringstream ss; -+ const size_t p=sbuf.pos0.path.find('/'); -+ if (p==std::string::npos) { -+ // no directory in forensic path so explicitly include the filename -+ ss << sp.fs.get_input_fname(); -+ if (sbuf.pos0.isRecursive()) { -+ // forensic path is recursive so add "/" + forensic path -+ ss << "/" << path_without_map_file_delimiter; -+ } -+ } else { -+ // directory in forensic path so print forensic path as is -+ ss << path_without_map_file_delimiter; -+ } -+ std::string source_filename = ss.str(); -+ -+ // calculate the file hash using the sbuf page -+ const md5_t sbuf_hash = hash_generator::hash_buf(sbuf.buf, sbuf.pagesize); -+ const std::string file_binary_hash = -+ std::string(reinterpret_cast(sbuf_hash.digest), 16); -+ -+ // track count values -+ size_t zero_count = 0; -+ size_t nonprobative_count = 0; - -- // import all the cryptograph hash values from all the blocks in sbuf -- for (size_t i=0; i < count; ++i) { -+ // import the cryptograph hash values from all the blocks in sbuf -+ for (size_t offset=0; offset(hash.digest), 16); -+ -+ // put together any block classification labels -+ // set flags based on specific tests on the block -+ // Construct an sbuf from the block and subject it to the other tests -+ const sbuf_t s(sbuf, offset, hashdb_block_size); -+ std::stringstream ss_flags; -+ if (ramp_trait(s)) ss_flags << "R"; -+ if (hist_trait(s)) ss_flags << "H"; -+ if (whitespace_trait(s)) ss_flags << "W"; -+ if (monotonic_trait(s)) ss_flags << "M"; -+ -+ // NOTE: shannon16 is Disabled because its results were not useful -+ // and because it needs fixed to not generate sbuf read exception. -+ //if (ss_flags.str().size() > 0) ss_flags << "," << shannon16(s); -+ -+ // flags means nonprobative -+ if (ss_flags.str().size() > 0) { -+ ++nonprobative_count; - } - -- // calculate the offset from the start of the media image -- uint64_t image_offset = sbuf.pos0.offset + offset; -- -- // create and add the import element to the import input -- import_input->push_back(hashdb_t::import_element_t( -- hash, -- hashdb_import_repository_name, -- ss.str(), -- image_offset)); -- } -- -- // perform the import -- int status = hashdb->import(*import_input); -- -- if (status != 0) { -- std::cerr << "scan_hashdb import failure\n"; -- } -- -- // clean up -- delete import_input; -+ // import the hash -+ import_manager->insert_hash(binary_hash, -+ 0, // entropy -+ ss_flags.str(), -+ file_binary_hash); -+ } -+ -+ // insert the source name pair -+ import_manager->insert_source_name(file_binary_hash, -+ hashdb_repository_name, source_filename); -+ -+ // insert the source data -+ import_manager->insert_source_data(file_binary_hash, -+ sbuf.pagesize, -+ "", // file type -+ zero_count, -+ nonprobative_count); - } - - // perform scan - static void do_scan(const class scanner_params &sp, - const recursion_control_block &rcb) { - -+ // get the feature recorder -+ feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks"); -+#ifdef DEBUG_V2_OUT -+ feature_recorder* identified_blocks_recorder2 = sp.fs.get_name("identified_blocks2"); -+#endif -+ - // get the sbuf - const sbuf_t& sbuf = sp.sbuf; - -- // there should be at least one block to process -- if (sbuf.pagesize < hashdb_block_size) { -- return; -- } -+ // process cryptographic hash values for blocks along sector boundaries -+ for (size_t offset=0; offset sbuf.pagesize) { -- --count; -- } -- -- // allocate space on heap for scan_input -- std::vector* scan_input = new std::vector; -- -- // allocate space on heap for the offset lookup table -- std::vector* offset_lookup_table = new std::vector; -- -- // get the cryptograph hash values of all the blocks along -- // sector boundaries from sbuf -- for (size_t i=0; i 0 && identified_blocks_recorder->count() >= -+ hashdb_max_feature_file_lines) { -+ break; -+ } - -- // calculate the offset associated with this index -- size_t offset = i * hashdb_scan_sector_size; -+ // Create a child sbuf of the block -+ const sbuf_t sbuf_to_hash(sbuf, offset, hashdb_block_size); - - // ignore empty blocks -- if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) { -+ if (empty_sbuf(sbuf_to_hash)){ - continue; - } - -- // add the offset to the offset lookup table -- offset_lookup_table->push_back(offset); -- -- // calculate and add the hash to the scan input -- scan_input->push_back(hash_generator::hash_buf( -- sbuf.buf + offset, hashdb_block_size)); -- } -- -- // allocate space on heap for scan_output -- hashdb_t::scan_output_t* scan_output = new hashdb_t::scan_output_t; -- -- // perform the scan -- int status = hashdb->scan(*scan_input, *scan_output); -- -- if (status != 0) { -- std::cerr << "Error: scan_hashdb scan failure. Aborting.\n"; -- exit(1); -- } -- -- // get the feature recorder -- feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks"); -+ // calculate the hash for this sector-aligned hash block -+ const md5_t hash = hash_one_block(sbuf_to_hash); -+ const std::string binary_hash = -+ std::string(reinterpret_cast(hash.digest), 16); -+ -+ // scan for the hash -+ std::string json_text = scan_manager->find_hash_json( -+ hashdb::scan_mode_t::EXPANDED_OPTIMIZED, binary_hash); -+ -+ if (json_text.size() == 0) { -+ // hash not found -+ continue; -+ } - -- // record each feature returned in the response -- for (hashdb_t::scan_output_t::const_iterator it=scan_output->begin(); it!= scan_output->end(); ++it) { -+ // prepare fields to record the feature - -- // prepare forensic path (pos0, feature, context) -- // as (pos0, hash_string, count_string) -+ // get hash_string from hash -+ std::string hash_string = hash.hexdigest(); - -- // pos0 -- pos0_t pos0 = sbuf.pos0 + offset_lookup_table->at(it->first); -+ // record the feature, there is no context field -+ identified_blocks_recorder->write(sbuf.pos0+offset, hash_string, json_text); - -- // hash_string -- std::string hash_string = scan_input->at(it->first).hexdigest(); -+#ifdef DEBUG_V2_OUT -+ size_t count = scan_manager->find_hash_count(binary_hash); - -- // count -+ // build context field - std::stringstream ss; -- ss << it->second; -- std::string count_string = ss.str(); -+ ss << "{\"count\":" << count << "}"; - - // record the feature -- identified_blocks_recorder->write(pos0, hash_string, count_string); -- } -- -- // clean up -- delete scan_input; -- delete offset_lookup_table; -- delete scan_output; --} -+ identified_blocks_recorder2->write(sbuf.pos0+offset, hash_string, ss.str()); -+#endif - --// detect if block is empty --inline bool is_empty_block(const uint8_t *buf) { -- for (size_t i=1; i/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir" -- BOOST_CPPFLAGS="-isystem$ac_boost_path_tmp/include" -+ BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" - break; - fi - done -@@ -179,7 +179,7 @@ - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` -- BOOST_CPPFLAGS="-isystem$ac_boost_path/include/boost-$VERSION_UNDERSCORE" -+ BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" - done - fi - else -@@ -202,7 +202,7 @@ - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` -- BOOST_CPPFLAGS="-isystem$best_path/include/boost-$VERSION_UNDERSCORE" -+ BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test "$ac_boost_lib_path" = ""; then - for libsubdir in $libsubdirs ; do - if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi -@@ -221,7 +221,7 @@ - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) -- BOOST_CPPFLAGS="-isystem$BOOST_ROOT" -+ BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" - fi - fi diff --git a/app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch b/app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch new file mode 100644 index 000000000..ebbc9dc60 --- /dev/null +++ b/app-forensics/bulk_extractor/files/bulk_extractor-2.0.3_uint32_t.patch @@ -0,0 +1,12 @@ +diff --git a/src/be20_api/unicode_escape.h b/src/be20_api/unicode_escape.h +--- a/src/be20_api/unicode_escape.h ++++ b/src/be20_api/unicode_escape.h +@@ -8,7 +8,7 @@ + #define UNICODE_ESCAPE_H + + #include +-//#include ++#include + #include + #include + #include diff --git a/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild b/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild index 3bf32591f..3faa4da9b 100644 --- a/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild +++ b/dev-libs/hashdb/hashdb-3.1.0-r1.ebuild @@ -1,4 +1,4 @@ -# Copyright 1999-2020 Gentoo Authors +# Copyright 1999-2023 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=7 @@ -11,12 +11,13 @@ DESCRIPTION="The hashdb block hash database tool and API" HOMEPAGE="https://github.com/NPS-DEEP/hashdb" SRC_URI="https://github.com/NPS-DEEP/hashdb/archive/v${PV}.tar.gz -> ${P}.tar.gz" -KEYWORDS="~amd64 ~hppa ~ppc ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~ppc-macos" +KEYWORDS="amd64 ~arm64 ~x86" LICENSE="GPL-3 public-domain" SLOT="0" IUSE="python static-libs test" REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )" +RESTRICT="!test? ( test )" RDEPEND=" app-forensics/libewf