This commit is contained in:
Anton Bolshakov 2023-06-07 11:00:19 +08:00
parent 3c7cf94420
commit b8361a152f
No known key found for this signature in database
GPG key ID: 32BDCED870788F04
9 changed files with 133 additions and 935 deletions

View file

@ -0,0 +1 @@
DIST bulk_extractor-2.0.3.tar.gz 8456967 BLAKE2B b8184c24dfc1ba9004f44f19a118cb84a9938f1aaf60663fe0bb259045ca4fe86ab339f8a265a71463fdc7e09b7a2f42989990c863f0888f5a2b4f80bd791677 SHA512 e1554f7f9863122ccd7405a5ec713fb3a09eed8e45db4c0c9580e8e914f1a477664683109c2b05e80a5dab169db8aa12ec8d0a49d8a959dc4ab622c11e0612f5

View file

@ -0,0 +1,116 @@
# Copyright 1999-2023 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=7
inherit autotools desktop eutils xdg-utils
DESCRIPTION="Scans a disk image for regular expressions and other content"
HOMEPAGE="https://github.com/simsong/bulk_extractor"
SRC_URI="https://digitalcorpora.s3.amazonaws.com/downloads/bulk_extractor/${P}.tar.gz"
LICENSE="GPL-2"
SLOT="0"
KEYWORDS="amd64 ~x86"
#fails to compile with ewf
#fails to compile with exiv2
#fails to compile without rar
IUSE="aff doc beviewer exiv2 hashdb +rar"
# ewf? ( app-forensics/libewf )
RDEPEND="
aff? ( app-forensics/afflib )
dev-libs/boost
dev-libs/expat
dev-libs/openssl:0=
dev-db/sqlite:3
dev-libs/libxml2
exiv2? ( media-gfx/exiv2 )
sys-libs/zlib
hashdb? ( dev-libs/hashdb )
beviewer? (
|| ( virtual/jre:* virtual/jdk:* )
)"
DEPEND="${RDEPEND}
doc? ( app-doc/doxygen )
virtual/man"
BDEPEND="
sys-devel/flex
virtual/pkgconfig"
src_prepare() {
eapply "${FILESDIR}/bulk_extractor-2.0.3_uint32_t.patch"
if [[ ${PV} != *9999 ]]; then
sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \
-i configure.ac || die
fi
eautoreconf
default
}
src_configure() {
econf \
--disable-o3 \
--disable-libewf
# $(use ewf || echo "--disable-libewf")
# $(use beviewer || echo "--disable-BEViewer") \
# $(use exiv2 && echo "--enable-exiv2") \
# $(use aff || echo "--disable-afflib") \
# $(use hashdb || echo "--disable-hashdb") \
# $(use rar || echo "--disable-rar" )
}
src_install() {
dobin src/${PN}
doman man/*.1
dodoc AUTHORS ChangeLog NEWS README.md
if use doc ; then
pushd doc/doxygen >/dev/null || die
doxygen || die "doxygen failed"
popd >/dev/null || die
dodoc -r \
doc/doxygen/html \
doc/Diagnostics_Notes \
doc/announce \
doc/*.{pdf,txt,md} \
doc/programmer_manual/*.pdf
fi
# if use beviewer; then
# local bev_dir="/opt/beviewer-${PV}"
# insinto "${bev_dir}"
# doins java_gui/BEViewer.jar
# insinto /usr/share/pixmaps
# newins java_gui/icons/24/run-build-install.png ${PN}.png
# make_wrapper "beviewer" \
# "/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
# make_desktop_entry \
# "beviewer" \
# "BEViewer (bulk_extractor)" \
# "${PN}" "Utility"
# fi
}
#pkg_postinst() {
# if use beviewer; then
# xdg_icon_cache_update
# xdg_desktop_database_update
# fi
#}
#pkg_postrm() {
# if use beviewer; then
# xdg_icon_cache_update
# xdg_desktop_database_update
# fi
#}

View file

@ -15,6 +15,7 @@ if [[ ${PV} != *9999 ]]; then
#EGIT_COMMIT="8563614408834087f242297813de9f75bdc9bedc"
EGIT_OVERRIDE_COMMIT_SIMSONG_BULK_EXTRACTOR="v2.0.3"
# EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1"
# EGIT_OVERRIDE_COMMIT_SIMSONG_BE20_API="f6d985f4d5f8228c1000c268911ad0cd97daedf1"
# EGIT_OVERRIDE_COMMIT_DFXML_WORKING_GROUP_DFXML_CPP="a283c888b4bb84b3dab937928f9495290a5a8a47"
# EGIT_OVERRIDE_COMMIT_NEMTRIF_UTFCPP="2ad995746bf1731d5e21cde47c9c3deff56bdbc2"
KEYWORDS="amd64 ~x86"

View file

@ -1,116 +0,0 @@
# Copyright 1999-2022 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=7
inherit autotools desktop eutils git-r3 xdg-utils
DESCRIPTION="Scans a disk image for regular expressions and other content"
HOMEPAGE="https://github.com/simsong/bulk_extractor"
# Please check a ".gitmodules" file on upstream before bump it
EGIT_REPO_URI="https://github.com/simsong/bulk_extractor"
if [[ ${PV} != *9999 ]]; then
EGIT_COMMIT="a52b133a3c56a483caa59eb8c68634ee1648c4ec" # 20191111 release
KEYWORDS="~amd64 ~x86"
fi
LICENSE="GPL-2"
SLOT="0"
IUSE="aff doc +beviewer +ewf +exiv2 hashdb rar"
RDEPEND="
aff? ( app-forensics/afflib )
dev-libs/boost
dev-libs/expat
dev-libs/openssl:0=
dev-db/sqlite:3
dev-libs/libxml2
ewf? ( app-forensics/libewf )
exiv2? ( media-gfx/exiv2 )
sys-libs/zlib
hashdb? ( dev-libs/hashdb )
beviewer? (
|| ( virtual/jre:* virtual/jdk:* )
)"
DEPEND="${RDEPEND}
doc? ( app-doc/doxygen )
virtual/man"
BDEPEND="
sys-devel/flex
virtual/pkgconfig"
src_prepare() {
eapply "${FILESDIR}/add_exiv2-0.27_api_support.patch"
if [[ ${PV} != *9999 ]]; then
sed -e "s/AC_INIT(BULK_EXTRACTOR, \(.*\),/AC_INIT(BULK_EXTRACTOR, ${PV},/" \
-i configure.ac || die
fi
eautoreconf
default
}
src_configure() {
econf \
--without-o3 \
$(use aff || echo "--disable-afflib") \
$(use beviewer || echo "--disable-BEViewer") \
$(use ewf || echo "--disable-libewf") \
$(use exiv2 && echo "--enable-exiv2") \
$(use hashdb || echo "--disable-hashdb") \
$(use rar || echo "--disable-rar" )
}
src_install() {
dobin src/${PN} plugins/plugin_test
doman man/*.1
dodoc AUTHORS ChangeLog NEWS README.md
if use doc ; then
pushd doc/doxygen >/dev/null || die
doxygen || die "doxygen failed"
popd >/dev/null || die
dodoc -r \
doc/doxygen/html \
doc/Diagnostics_Notes \
doc/announce \
doc/*.{pdf,txt,md} \
doc/programmer_manual/*.pdf
fi
if use beviewer; then
local bev_dir="/opt/beviewer-${PV}"
insinto "${bev_dir}"
doins java_gui/BEViewer.jar
insinto /usr/share/pixmaps
newins java_gui/icons/24/run-build-install.png ${PN}.png
make_wrapper "beviewer" \
"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
make_desktop_entry \
"beviewer" \
"BEViewer (bulk_extractor)" \
"${PN}" "Utility"
fi
}
pkg_postinst() {
if use beviewer; then
xdg_icon_cache_update
xdg_desktop_database_update
fi
}
pkg_postrm() {
if use beviewer; then
xdg_icon_cache_update
xdg_desktop_database_update
fi
}

View file

@ -1,24 +0,0 @@
diff -ur a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp
--- a/src/scan_exiv2.cpp 2014-09-16 22:34:00.000000000 +0400
+++ b/src/scan_exiv2.cpp 2019-03-17 08:38:29.479753464 +0300
@@ -68,7 +68,7 @@
* Used for helping to convert libexiv2's GPS format to decimal lat/long
*/
-static double stod(string s)
+static double sub_stod(string s)
{
double d=0;
sscanf(s.c_str(),"%lf",&d);
@@ -78,9 +78,9 @@
static double rational(string s)
{
std::vector<std::string> parts = split(s,'/');
- if(parts.size()!=2) return stod(s); // no slash, so return without
- double top = stod(parts[0]);
- double bot = stod(parts[1]);
+ if(parts.size()!=2) return sub_stod(s); // no slash, so return without
+ double top = sub_stod(parts[0]);
+ double bot = sub_stod(parts[1]);
return bot>0 ? top / bot : top;
}

View file

@ -1,733 +0,0 @@
diff -ur a/src/scan_hashdb.cpp b/src/scan_hashdb.cpp
--- a/src/scan_hashdb.cpp 2014-09-16 22:34:00.000000000 +0400
+++ b/src/scan_hashdb.cpp 2019-03-16 14:07:05.887464616 +0300
@@ -31,47 +31,146 @@
#ifdef HAVE_HASHDB
+//#define DEBUG_V2_OUT
+
#include "hashdb.hpp"
#include <dfxml/src/hash_t.h>
#include <iostream>
-#include <unistd.h> // for getpid
-#include <sys/types.h> // for getpid
+#include <cmath>
+#include <unistd.h> // for getpid
+#include <sys/types.h> // for getpid
// user settings
-static std::string hashdb_mode="none"; // import or scan
-static uint32_t hashdb_block_size=4096; // import or scan
-static bool hashdb_ignore_empty_blocks=true; // import or scan
-static std::string hashdb_scan_path_or_socket="your_hashdb_directory"; // scan only
-static size_t hashdb_scan_sector_size = 512; // scan only
-static size_t hashdb_import_sector_size = 4096; // import only
-static std::string hashdb_import_repository_name="default_repository"; // import only
-static uint32_t hashdb_import_max_duplicates=0; // import only
+static std::string hashdb_mode="none"; // import or scan
+static uint32_t hashdb_block_size=512; // import or scan
+static uint32_t hashdb_step_size=512; // import or scan
+static std::string hashdb_scan_path="your_hashdb_directory"; // scan only
+static std::string hashdb_repository_name="default_repository"; // import only
+static uint32_t hashdb_max_feature_file_lines=0; // scan only for feature file
// runtime modes
// scanner mode
enum mode_type_t {MODE_NONE, MODE_SCAN, MODE_IMPORT};
static mode_type_t mode = MODE_NONE;
-// internal helper functions
-static void do_import(const class scanner_params &sp,
- const recursion_control_block &rcb);
-static void do_scan(const class scanner_params &sp,
- const recursion_control_block &rcb);
-inline bool is_empty_block(const uint8_t *buf);
-
// global state
// hashdb directory, import only
static std::string hashdb_dir;
// hash type
-typedef md5_t hash_t;
typedef md5_generator hash_generator;
// hashdb manager
-typedef hashdb_t__<hash_t> hashdb_t;
-hashdb_t* hashdb;
+static hashdb::import_manager_t* import_manager;
+static hashdb::scan_manager_t* scan_manager;
+
+static void do_import(const class scanner_params &sp,
+ const recursion_control_block &rcb);
+static void do_scan(const class scanner_params &sp,
+ const recursion_control_block &rcb);
+
+
+// safely hash sbuf range without overflow failure
+inline const md5_t hash_one_block(const sbuf_t &sbuf)
+{
+ if (sbuf.bufsize >= hashdb_block_size) {
+ // hash from the beginning
+ return hash_generator::hash_buf(sbuf.buf, hashdb_block_size);
+ }
+ // hash the available part and zero-fill
+ hash_generator g;
+ g.update(sbuf.buf, sbuf.bufsize);
+
+ // hash in extra zeros to fill out the block
+ size_t extra = hashdb_block_size - sbuf.bufsize;
+ std::vector<uint8_t> zeros(extra);
+ g.update(&zeros[0], extra);
+ return g.final();
+}
+
+// rules for determining if a block should be ignored
+static bool ramp_trait(const sbuf_t &sbuf)
+{
+ if (sbuf.pagesize < 8) {
+ // not enough to process
+ return false;
+ }
+
+ uint32_t count = 0;
+ for(size_t i=0;i<sbuf.pagesize-8;i+=4){
+ // note that little endian is detected and big endian is not detected
+ if (sbuf.get32u(i)+1 == sbuf.get32u(i+4)) {
+ count += 1;
+ }
+ }
+ return count > sbuf.pagesize/8;
+}
+
+static bool hist_trait(const sbuf_t &sbuf)
+{
+ if (sbuf.pagesize < hashdb_block_size) {
+ // do not perform any histogram analysis on short blocks
+ return false;
+ }
+
+ std::map<uint32_t,uint32_t> hist;
+ for(size_t i=0;i<sbuf.pagesize-4;i+=4){
+ hist[sbuf.get32uBE(i)] += 1;
+ }
+ if (hist.size() < 3) return true;
+ for (std::map<uint32_t,uint32_t>::const_iterator it = hist.begin();it != hist.end(); it++){
+ if ((it->second) > hashdb_block_size/16){
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool whitespace_trait(const sbuf_t &sbuf)
+{
+ size_t count = 0;
+ for(size_t i=0;i<sbuf.pagesize;i++){
+ if (isspace(sbuf[i])) count+=1;
+ }
+ return count >= (sbuf.pagesize * 3)/4;
+}
+
+static bool monotonic_trait(const sbuf_t &sbuf)
+{
+ if (sbuf.pagesize < 16) {
+ // not enough data
+ return false;
+ }
+
+ const double total = sbuf.pagesize / 4.0;
+ int increasing = 0, decreasing = 0, same = 0;
+ for (size_t i=0; i+8<sbuf.pagesize; i+=4) {
+ if (sbuf.get32u(i+4) > sbuf.get32u(i)) {
+ increasing++;
+ } else if (sbuf.get32u(i+4) < sbuf.get32u(i)) {
+ decreasing++;
+ } else {
+ same++;
+ }
+ }
+ if (increasing / total >= 0.75) return true;
+ if (decreasing / total >= 0.75) return true;
+ if (same / total >= 0.75) return true;
+ return false;
+}
+
+// detect if block is all the same
+inline bool empty_sbuf(const sbuf_t &sbuf)
+{
+ for (size_t i=1; i<sbuf.bufsize; i++) {
+ if (sbuf[i] != sbuf[0]) {
+ return false;
+ }
+ }
+ return true; // all the same
+}
extern "C"
void scan_hashdb(const class scanner_params &sp,
@@ -82,9 +181,12 @@
case scanner_params::PHASE_STARTUP: {
// set properties for this scanner
+ std::string desc = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
+ desc += std::string(" (hashdb version") + std::string(hashdb_version()) + std::string(")");
+
sp.info->name = "hashdb";
sp.info->author = "Bruce Allen";
- sp.info->description = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
+ sp.info->description = desc;
sp.info->flags = scanner_info::SCANNER_DISABLED;
// hashdb_mode
@@ -97,60 +199,52 @@
// hashdb_block_size
sp.info->get_config("hashdb_block_size", &hashdb_block_size,
- "Hash block size, in bytes, used to generate hashes");
+ "Selects the block size to hash, in bytes.");
- // hashdb_ignore_empty_blocks
- sp.info->get_config("hashdb_ignore_empty_blocks", &hashdb_ignore_empty_blocks,
- "Selects to ignore empty blocks.");
-
- // hashdb_scan_path_or_socket
- std::stringstream ss_hashdb_scan_path_or_socket;
- ss_hashdb_scan_path_or_socket
- << "File path to a hash database or\n"
- << " socket to a hashdb server to scan against. Valid only in scan mode.";
- sp.info->get_config("hashdb_scan_path_or_socket", &hashdb_scan_path_or_socket,
- ss_hashdb_scan_path_or_socket.str());
-
- // hashdb_scan_sector_size
- std::stringstream ss_hashdb_scan_sector_size;
- ss_hashdb_scan_sector_size
- << "Selects the scan sector size. Scans along\n"
- << " sector boundaries. Valid only in scan mode.";
- sp.info->get_config("hashdb_scan_sector_size", &hashdb_scan_sector_size,
- ss_hashdb_scan_sector_size.str());
-
- // hashdb_import_sector_size
- std::stringstream ss_hashdb_import_sector_size;
- ss_hashdb_import_sector_size
- << "Selects the import sector size. Imports along\n"
- << " sector boundaries. Valid only in import mode.";
- sp.info->get_config("hashdb_import_sector_size", &hashdb_import_sector_size,
- ss_hashdb_import_sector_size.str());
+ // hashdb_step_size
+ std::stringstream ss_hashdb_step_size;
+ ss_hashdb_step_size
+ << "Selects the step size. Scans and imports along\n"
+ << " this step value.";
+ sp.info->get_config("hashdb_step_size", &hashdb_step_size,
+ ss_hashdb_step_size.str());
+
+
+ // hashdb_scan_path
+ std::stringstream ss_hashdb_scan_path;
+ ss_hashdb_scan_path
+ << "File path to a hash database to scan against.\n"
+ << " Valid only in scan mode.";
+ sp.info->get_config("hashdb_scan_path", &hashdb_scan_path,
+ ss_hashdb_scan_path.str());
- // hashdb_import_repository_name
+ // hashdb_repository_name
std::stringstream ss_hashdb_import_repository_name;
ss_hashdb_import_repository_name
<< "Sets the repository name to\n"
<< " attribute the import to. Valid only in import mode.";
- sp.info->get_config("hashdb_import_repository_name",
- &hashdb_import_repository_name,
+ sp.info->get_config("hashdb_repository_name",
+ &hashdb_repository_name,
ss_hashdb_import_repository_name.str());
- // hashdb_import_max_duplicates
- std::stringstream ss_hashdb_import_max_duplicates;
- ss_hashdb_import_max_duplicates
- << "The maximum number of duplicates to import\n"
- << " for a given hash value, or 0 for no limit. Valid only in import mode.";
- sp.info->get_config("hashdb_import_max_duplicates", &hashdb_import_max_duplicates,
- ss_hashdb_import_max_duplicates.str());
-
-
// configure the feature file to accept scan features
// but only if in scan mode
if (hashdb_mode == "scan") {
sp.info->feature_names.insert("identified_blocks");
+#ifdef DEBUG_V2_OUT
+ sp.info->feature_names.insert("identified_blocks2");
+#endif
}
+ // hashdb_max_feature_file_lines
+ std::stringstream ss_hashdb_max_feature_file_lines;
+ ss_hashdb_max_feature_file_lines
+ << "The maximum number of features lines to record\n"
+ << " or 0 for no limit. Valid only in scan mode.";
+ sp.info->get_config("hashdb_max_feature_file_lines", &hashdb_max_feature_file_lines,
+ ss_hashdb_max_feature_file_lines.str());
+
+
return;
}
@@ -168,62 +262,27 @@
} else {
// bad mode
std::cerr << "Error. Parameter 'hashdb_mode' value '"
- << hashdb_mode << "' is invalid.\n"
+ << hashdb_mode << "' must be [none|import|scan].\n"
<< "Cannot continue.\n";
exit(1);
}
- // hashdb_ignore_empty_blocks
- // checks not performed
-
// hashdb_block_size
if (hashdb_block_size == 0) {
std::cerr << "Error. Value for parameter 'hashdb_block_size' is invalid.\n"
- << "Cannot continue.\n";
- exit(1);
- }
-
- // hashdb_scan_path_or_socket
- // checks not performed
-
- // hashdb_scan_sector_size
- if (hashdb_scan_sector_size == 0) {
- std::cerr << "Error. Value for parameter 'hashdb_scan_sector_size' is invalid.\n"
- << "Cannot continue.\n";
- exit(1);
- }
-
- // for valid operation, scan sectors must align on hash block boundaries
- if (mode == MODE_SCAN && hashdb_block_size % hashdb_scan_sector_size != 0) {
- std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
- << " or hashdb scan sector size=" << hashdb_scan_sector_size << ".\n"
- << "Sectors must align on hash block boundaries.\n"
- << "Specifically, hashdb_block_size \% hashdb_scan_sector_size must be zero.\n"
- << "Cannot continue.\n";
- exit(1);
- }
-
- // hashdb_import_sector_size
- if (hashdb_import_sector_size == 0) {
- std::cerr << "Error. Value for parameter 'hashdb_import_sector_size' is invalid.\n"
<< "Cannot continue.\n";
exit(1);
}
- // for valid operation, import sectors must align on hash block boundaries
- if (mode == MODE_IMPORT && hashdb_block_size % hashdb_import_sector_size != 0) {
- std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
- << " or hashdb import sector size=" << hashdb_import_sector_size << ".\n"
- << "Sectors must align on hash block boundaries.\n"
- << "Specifically, hashdb_block_size \% hashdb_import_sector_size must be zero.\n"
+ // hashdb_step_size
+ if (hashdb_step_size == 0) {
+ std::cerr << "Error. Value for parameter 'hashdb_step_size' is invalid.\n"
<< "Cannot continue.\n";
exit(1);
}
- // hashdb_import_repository_name
- // checks not performed
- // hashdb_import_max_duplicates
- // checks not performed
+ // indicate hashdb version
+ std::cout << "hashdb: hashdb_version=" << hashdb_version() << "\n";
// perform setup based on mode
switch(mode) {
@@ -231,40 +290,49 @@
// set the path to the hashdb
hashdb_dir = sp.fs.get_outdir() + "/" + "hashdb.hdb";
- // create the new hashdb manager for importing
- // currently, hashdb_dir is required to not exist
- hashdb = new hashdb_t(hashdb_dir,
- hashdb_block_size,
- hashdb_import_max_duplicates);
-
- // show relavent settable options
- std::string temp1((hashdb_ignore_empty_blocks) ? "YES" : "NO");
+ // show relevant settable options
std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
<< "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
- << "hashdb: hashdb_ignore_empty_blocks=" << temp1 << "\n"
- << "hashdb: hashdb_import_sector_size= " << hashdb_import_sector_size << "\n"
- << "hashdb: hashdb_import_repository_name= " << hashdb_import_repository_name << "\n"
- << "hashdb: hashdb_import_max_duplicates=" << hashdb_import_max_duplicates << "\n"
+ << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
+ << "hashdb: hashdb_repository_name= " << hashdb_repository_name << "\n"
<< "hashdb: Creating hashdb directory " << hashdb_dir << "\n";
+
+ // open hashdb for importing
+ // currently, hashdb_dir is required to not exist
+ hashdb::settings_t settings;
+ settings.block_size = hashdb_block_size;
+ std::string error_message = hashdb::create_hashdb(hashdb_dir, settings, "");
+ if (error_message.size() != 0) {
+ std::cerr << "Error: " << error_message << "\n";
+ exit(1);
+ }
+ import_manager = new hashdb::import_manager_t(hashdb_dir, "");
return;
}
case MODE_SCAN: {
- // show relavent settable options
- std::string temp2((hashdb_ignore_empty_blocks) ? "YES" : "NO");
+ // show relevant settable options
std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
<< "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
- << "hashdb: hashdb_ignore_empty_blocks=" << temp2 << "\n"
- << "hashdb: hashdb_scan_path_or_socket=" << hashdb_scan_path_or_socket << "\n"
- << "hashdb: hashdb_scan_sector_size=" << hashdb_scan_sector_size << "\n";
+ << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
+ << "hashdb: hashdb_scan_path=" << hashdb_scan_path << "\n"
+ << "hashdb: hashdb_max_feature_file_lines=" << hashdb_max_feature_file_lines
+ << "\n";
+
+ // open hashdb for scanning
+ scan_manager = new hashdb::scan_manager_t(hashdb_scan_path);
+
+ // set the feature recorder to leave context alone but fix invalid utf8
+ sp.fs.get_name("identified_blocks")->set_flag(feature_recorder::FLAG_XML);
+#ifdef DEBUG_V2_OUT
+ sp.fs.get_name("identified_blocks2")->set_flag(feature_recorder::FLAG_XML);
+#endif
- // open the hashdb manager for scanning
- hashdb = new hashdb_t(hashdb_scan_path_or_socket);
return;
}
case MODE_NONE: {
- // show relavent settable options
+ // show relevant settable options
std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
<< "WARNING: the hashdb scanner is enabled but it will not perform any action\n"
<< "because no mode has been selected. Please either select a hashdb mode or\n"
@@ -285,7 +353,7 @@
case scanner_params::PHASE_SCAN: {
switch(mode) {
case MODE_IMPORT:
- do_import(sp, rcb);
+ do_import(sp, rcb);
return;
case MODE_SCAN:
@@ -301,14 +369,17 @@
// shutdown
case scanner_params::PHASE_SHUTDOWN: {
switch(mode) {
- case MODE_SCAN:
- delete hashdb;
- return;
case MODE_IMPORT:
- delete hashdb;
- return;
+ delete import_manager;
+ return;
+
+ case MODE_SCAN:
+ delete scan_manager;
+ return;
default:
- return;
+ // the user should have just left the scanner disabled.
+ // no action.
+ return;
}
}
@@ -327,170 +398,154 @@
// get the sbuf
const sbuf_t& sbuf = sp.sbuf;
- // there should be at least one block to process
- if (sbuf.pagesize < hashdb_block_size) {
- return;
- }
-
- // get count of blocks to process
- size_t count = sbuf.bufsize / hashdb_import_sector_size;
- while ((count * hashdb_import_sector_size) +
- (hashdb_block_size - hashdb_import_sector_size) > sbuf.pagesize) {
- --count;
- }
-
- // allocate space on heap for import_input
- std::vector<hashdb_t::import_element_t>* import_input =
- new std::vector<hashdb_t::import_element_t>;
+ // get the filename from sbuf without the sbuf map file delimiter
+ std::string path_without_map_file_delimiter =
+ (sbuf.pos0.path.size() > 4) ?
+ std::string(sbuf.pos0.path, 0, sbuf.pos0.path.size() - 4) : "";
+
+ // get the filename to use as the source filename
+ std::stringstream ss;
+ const size_t p=sbuf.pos0.path.find('/');
+ if (p==std::string::npos) {
+ // no directory in forensic path so explicitly include the filename
+ ss << sp.fs.get_input_fname();
+ if (sbuf.pos0.isRecursive()) {
+ // forensic path is recursive so add "/" + forensic path
+ ss << "/" << path_without_map_file_delimiter;
+ }
+ } else {
+ // directory in forensic path so print forensic path as is
+ ss << path_without_map_file_delimiter;
+ }
+ std::string source_filename = ss.str();
+
+ // calculate the file hash using the sbuf page
+ const md5_t sbuf_hash = hash_generator::hash_buf(sbuf.buf, sbuf.pagesize);
+ const std::string file_binary_hash =
+ std::string(reinterpret_cast<const char*>(sbuf_hash.digest), 16);
+
+ // track count values
+ size_t zero_count = 0;
+ size_t nonprobative_count = 0;
- // import all the cryptograph hash values from all the blocks in sbuf
- for (size_t i=0; i < count; ++i) {
+ // import the cryptograph hash values from all the blocks in sbuf
+ for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
- // calculate the offset associated with this index
- size_t offset = i * hashdb_import_sector_size;
+ // Create a child sbuf of what we would hash
+ const sbuf_t sbuf_to_hash(sbuf,offset,hashdb_block_size);
// ignore empty blocks
- if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
+ if (empty_sbuf(sbuf_to_hash)){
+ ++zero_count;
continue;
}
- // calculate the hash for this sector-aligned hash block
- hash_t hash = hash_generator::hash_buf(
- sbuf.buf + offset,
- hashdb_block_size);
-
- // compose the filename based on the forensic path
- std::stringstream ss;
- size_t p=sbuf.pos0.path.find('/');
- if (p==std::string::npos) {
- // no directory in forensic path so explicitly include the filename
- ss << sp.fs.get_input_fname();
- if (sbuf.pos0.isRecursive()) {
- // forensic path is recursive so add "/" + forensic path
- ss << "/" << sbuf.pos0.path;
- }
- } else {
- // directory in forensic path so print forensic path as is
- ss << sbuf.pos0.path;
+ // calculate the hash for this import-sector-aligned hash block
+ const md5_t hash = hash_one_block(sbuf_to_hash);
+ const std::string binary_hash(reinterpret_cast<const char*>(hash.digest), 16);
+
+ // put together any block classification labels
+ // set flags based on specific tests on the block
+ // Construct an sbuf from the block and subject it to the other tests
+ const sbuf_t s(sbuf, offset, hashdb_block_size);
+ std::stringstream ss_flags;
+ if (ramp_trait(s)) ss_flags << "R";
+ if (hist_trait(s)) ss_flags << "H";
+ if (whitespace_trait(s)) ss_flags << "W";
+ if (monotonic_trait(s)) ss_flags << "M";
+
+ // NOTE: shannon16 is Disabled because its results were not useful
+ // and because it needs fixed to not generate sbuf read exception.
+ //if (ss_flags.str().size() > 0) ss_flags << "," << shannon16(s);
+
+ // flags means nonprobative
+ if (ss_flags.str().size() > 0) {
+ ++nonprobative_count;
}
- // calculate the offset from the start of the media image
- uint64_t image_offset = sbuf.pos0.offset + offset;
-
- // create and add the import element to the import input
- import_input->push_back(hashdb_t::import_element_t(
- hash,
- hashdb_import_repository_name,
- ss.str(),
- image_offset));
- }
-
- // perform the import
- int status = hashdb->import(*import_input);
-
- if (status != 0) {
- std::cerr << "scan_hashdb import failure\n";
- }
-
- // clean up
- delete import_input;
+ // import the hash
+ import_manager->insert_hash(binary_hash,
+ 0, // entropy
+ ss_flags.str(),
+ file_binary_hash);
+ }
+
+ // insert the source name pair
+ import_manager->insert_source_name(file_binary_hash,
+ hashdb_repository_name, source_filename);
+
+ // insert the source data
+ import_manager->insert_source_data(file_binary_hash,
+ sbuf.pagesize,
+ "", // file type
+ zero_count,
+ nonprobative_count);
}
// perform scan
static void do_scan(const class scanner_params &sp,
const recursion_control_block &rcb) {
+ // get the feature recorder
+ feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
+#ifdef DEBUG_V2_OUT
+ feature_recorder* identified_blocks_recorder2 = sp.fs.get_name("identified_blocks2");
+#endif
+
// get the sbuf
const sbuf_t& sbuf = sp.sbuf;
- // there should be at least one block to process
- if (sbuf.pagesize < hashdb_block_size) {
- return;
- }
+ // process cryptographic hash values for blocks along sector boundaries
+ for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
- // get count of blocks to process
- size_t count = sbuf.bufsize / hashdb_scan_sector_size;
- while ((count * hashdb_scan_sector_size) +
- (hashdb_block_size - hashdb_scan_sector_size) > sbuf.pagesize) {
- --count;
- }
-
- // allocate space on heap for scan_input
- std::vector<hash_t>* scan_input = new std::vector<hash_t>;
-
- // allocate space on heap for the offset lookup table
- std::vector<uint32_t>* offset_lookup_table = new std::vector<uint32_t>;
-
- // get the cryptograph hash values of all the blocks along
- // sector boundaries from sbuf
- for (size_t i=0; i<count; ++i) {
+ // stop recording if feature file line count is at requested max
+ if (hashdb_max_feature_file_lines > 0 && identified_blocks_recorder->count() >=
+ hashdb_max_feature_file_lines) {
+ break;
+ }
- // calculate the offset associated with this index
- size_t offset = i * hashdb_scan_sector_size;
+ // Create a child sbuf of the block
+ const sbuf_t sbuf_to_hash(sbuf, offset, hashdb_block_size);
// ignore empty blocks
- if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
+ if (empty_sbuf(sbuf_to_hash)){
continue;
}
- // add the offset to the offset lookup table
- offset_lookup_table->push_back(offset);
-
- // calculate and add the hash to the scan input
- scan_input->push_back(hash_generator::hash_buf(
- sbuf.buf + offset, hashdb_block_size));
- }
-
- // allocate space on heap for scan_output
- hashdb_t::scan_output_t* scan_output = new hashdb_t::scan_output_t;
-
- // perform the scan
- int status = hashdb->scan(*scan_input, *scan_output);
-
- if (status != 0) {
- std::cerr << "Error: scan_hashdb scan failure. Aborting.\n";
- exit(1);
- }
-
- // get the feature recorder
- feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
+ // calculate the hash for this sector-aligned hash block
+ const md5_t hash = hash_one_block(sbuf_to_hash);
+ const std::string binary_hash =
+ std::string(reinterpret_cast<const char*>(hash.digest), 16);
+
+ // scan for the hash
+ std::string json_text = scan_manager->find_hash_json(
+ hashdb::scan_mode_t::EXPANDED_OPTIMIZED, binary_hash);
+
+ if (json_text.size() == 0) {
+ // hash not found
+ continue;
+ }
- // record each feature returned in the response
- for (hashdb_t::scan_output_t::const_iterator it=scan_output->begin(); it!= scan_output->end(); ++it) {
+ // prepare fields to record the feature
- // prepare forensic path (pos0, feature, context)
- // as (pos0, hash_string, count_string)
+ // get hash_string from hash
+ std::string hash_string = hash.hexdigest();
- // pos0
- pos0_t pos0 = sbuf.pos0 + offset_lookup_table->at(it->first);
+ // record the feature, there is no context field
+ identified_blocks_recorder->write(sbuf.pos0+offset, hash_string, json_text);
- // hash_string
- std::string hash_string = scan_input->at(it->first).hexdigest();
+#ifdef DEBUG_V2_OUT
+ size_t count = scan_manager->find_hash_count(binary_hash);
- // count
+ // build context field
std::stringstream ss;
- ss << it->second;
- std::string count_string = ss.str();
+ ss << "{\"count\":" << count << "}";
// record the feature
- identified_blocks_recorder->write(pos0, hash_string, count_string);
- }
-
- // clean up
- delete scan_input;
- delete offset_lookup_table;
- delete scan_output;
-}
+ identified_blocks_recorder2->write(sbuf.pos0+offset, hash_string, ss.str());
+#endif
-// detect if block is empty
-inline bool is_empty_block(const uint8_t *buf) {
- for (size_t i=1; i<hashdb_block_size; i++) {
- if (buf[i] != buf[0]) {
- return false;
- }
}
- return true;
}
#endif
-

View file

@ -1,60 +0,0 @@
diff -ur a/configure.ac b/configure.ac
--- a/configure.ac 2014-09-16 23:08:06.000000000 +0400
+++ b/configure.ac 2019-03-17 10:08:12.594871130 +0300
@@ -150,7 +150,7 @@
AC_ARG_ENABLE([flexscanners],
AS_HELP_STRING([--disable-flexscanners], [disable FLEX-based scanners]),
[],
- [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]), flexscanners='yes'])
+ [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]) flexscanners='yes'])
AM_CONDITIONAL([FLEXSCANNERS_ENABLED], [test "yes" = "$flexscanners"])
diff -ur a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4
--- a/m4/ax_boost_base.m4 2014-09-16 22:34:00.000000000 +0400
+++ b/m4/ax_boost_base.m4 2019-03-17 10:12:31.849532373 +0300
@@ -107,7 +107,7 @@
dnl this location ist chosen if boost libraries are installed with the --layout=system option
dnl or if you install boost with RPM
if test "$ac_boost_path" != ""; then
- BOOST_CPPFLAGS="-isystem$ac_boost_path/include"
+ BOOST_CPPFLAGS="-I$ac_boost_path/include"
for ac_boost_path_tmp in $libsubdirs; do
if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then
BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp"
@@ -126,7 +126,7 @@
if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
done
BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir"
- BOOST_CPPFLAGS="-isystem$ac_boost_path_tmp/include"
+ BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
break;
fi
done
@@ -179,7 +179,7 @@
_version=$_version_tmp
fi
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
- BOOST_CPPFLAGS="-isystem$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
+ BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
done
fi
else
@@ -202,7 +202,7 @@
done
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
- BOOST_CPPFLAGS="-isystem$best_path/include/boost-$VERSION_UNDERSCORE"
+ BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
if test "$ac_boost_lib_path" = ""; then
for libsubdir in $libsubdirs ; do
if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
@@ -221,7 +221,7 @@
V_CHECK=`expr $stage_version_shorten \>\= $_version`
if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
- BOOST_CPPFLAGS="-isystem$BOOST_ROOT"
+ BOOST_CPPFLAGS="-I$BOOST_ROOT"
BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
fi
fi

View file

@ -0,0 +1,12 @@
diff --git a/src/be20_api/unicode_escape.h b/src/be20_api/unicode_escape.h
--- a/src/be20_api/unicode_escape.h
+++ b/src/be20_api/unicode_escape.h
@@ -8,7 +8,7 @@
#define UNICODE_ESCAPE_H
#include <codecvt>
-//#include <cstdint>
+#include <cstdint>
#include <cstring>
#include <cwctype>
#include <iostream>

View file

@ -1,4 +1,4 @@
# Copyright 1999-2020 Gentoo Authors
# Copyright 1999-2023 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=7
@ -11,12 +11,13 @@ DESCRIPTION="The hashdb block hash database tool and API"
HOMEPAGE="https://github.com/NPS-DEEP/hashdb"
SRC_URI="https://github.com/NPS-DEEP/hashdb/archive/v${PV}.tar.gz -> ${P}.tar.gz"
KEYWORDS="~amd64 ~hppa ~ppc ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~ppc-macos"
KEYWORDS="amd64 ~arm64 ~x86"
LICENSE="GPL-3 public-domain"
SLOT="0"
IUSE="python static-libs test"
REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
RESTRICT="!test? ( test )"
RDEPEND="
app-forensics/libewf