From 391a394e4217ce4e7324cf1672a49c13a687d0fe Mon Sep 17 00:00:00 2001
From: Yury Martynov <email@linxon.ru>
Date: Sat, 16 Mar 2019 22:41:25 +0300
Subject: [PATCH] app-forensics/bulk_extractor: update ebuilds and bump to
 1.6.0_p20181112

Package-Manager: Portage-2.3.62, Repoman-2.3.11
---
 app-forensics/bulk_extractor/Manifest         |   2 +-
 .../bulk_extractor-1.5.5-r1.ebuild            | 115 +++
 .../bulk_extractor-1.5.5.ebuild               |  26 -
 .../bulk_extractor-1.6.0_p20181112.ebuild     | 120 +++
 .../bulk_extractor/bulk_extractor-9999.ebuild | 120 +++
 .../files/add_exiv2-0.27.0_support.patch      |  14 +
 ..._hashdb-3.1.0_and_old_bulk_extractor.patch | 733 ++++++++++++++++++
 .../files/fix_call_of_overloaded_errors.patch |  24 +
 .../files/other_minor_fixes.patch             |  60 ++
 ...match_hashdb_source_interface_change.patch | 105 +++
 app-forensics/bulk_extractor/metadata.xml     |  20 +
 11 files changed, 1312 insertions(+), 27 deletions(-)
 create mode 100644 app-forensics/bulk_extractor/bulk_extractor-1.5.5-r1.ebuild
 delete mode 100644 app-forensics/bulk_extractor/bulk_extractor-1.5.5.ebuild
 create mode 100644 app-forensics/bulk_extractor/bulk_extractor-1.6.0_p20181112.ebuild
 create mode 100644 app-forensics/bulk_extractor/bulk_extractor-9999.ebuild
 create mode 100644 app-forensics/bulk_extractor/files/add_exiv2-0.27.0_support.patch
 create mode 100644 app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch
 create mode 100644 app-forensics/bulk_extractor/files/fix_call_of_overloaded_errors.patch
 create mode 100644 app-forensics/bulk_extractor/files/other_minor_fixes.patch
 create mode 100644 app-forensics/bulk_extractor/files/update_to_match_hashdb_source_interface_change.patch
 create mode 100644 app-forensics/bulk_extractor/metadata.xml

diff --git a/app-forensics/bulk_extractor/Manifest b/app-forensics/bulk_extractor/Manifest
index 262394d7f..4a37f6d71 100644
--- a/app-forensics/bulk_extractor/Manifest
+++ b/app-forensics/bulk_extractor/Manifest
@@ -1 +1 @@
-DIST bulk_extractor-1.5.5.tar.gz 4473107 SHA256 297a57808c12b81b8e0d82222cf57245ad988804ab467eb0a70cf8669594e8ed SHA512 9f1384f9d3b499beecc858f78e0abd4c904dad0c64ea04bf98c1a0195b62b17be19ed2bb7b104a3c65545988c6ec729d83f75673e27690585f18b5eb468a083a WHIRLPOOL 0ad57b842f8d2902eb11321cde360f52baff00920b6ba9ebb87a7f0a002247c76138685c77d9eb8ef1853700003a0dbe5503c62fe9513f9a67650136d3a6c4bb
+DIST bulk_extractor-1.5.5.tar.gz 4473107 BLAKE2B c341722c3cee2b640ab456a907a7644ccd09fe3ae3764a1fe316acedf26256a866513de5ea1575d536a1c962d86606717815c4d08cc3e7d9db2f3fbcf4bba5f0 SHA512 9f1384f9d3b499beecc858f78e0abd4c904dad0c64ea04bf98c1a0195b62b17be19ed2bb7b104a3c65545988c6ec729d83f75673e27690585f18b5eb468a083a
diff --git a/app-forensics/bulk_extractor/bulk_extractor-1.5.5-r1.ebuild b/app-forensics/bulk_extractor/bulk_extractor-1.5.5-r1.ebuild
new file mode 100644
index 000000000..def763e29
--- /dev/null
+++ b/app-forensics/bulk_extractor/bulk_extractor-1.5.5-r1.ebuild
@@ -0,0 +1,115 @@
+# Copyright 1999-2019 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+inherit autotools eutils flag-o-matic gnome2-utils xdg-utils
+
+DESCRIPTION="Scans a disk image for regular expressions and other content"
+HOMEPAGE="https://github.com/simsong/bulk_extractor"
+SRC_URI="http://digitalcorpora.org/downloads/bulk_extractor/${P}.tar.gz"
+KEYWORDS="~amd64 ~x86"
+LICENSE="GPL-2"
+RESTRICT="mirror"
+SLOT="0"
+IUSE="aff doc +beviewer +exiv2 hashdb rar"
+
+RDEPEND="
+	aff? ( app-forensics/afflib )
+	beviewer? ( virtual/jdk:* )
+	dev-libs/boost[threads]
+	dev-libs/expat
+	dev-libs/openssl:0=
+	dev-db/sqlite:3
+	dev-libs/libxml2
+	exiv2? ( >=media-gfx/exiv2-0.27.0 )
+	sys-libs/zlib
+	hashdb? ( >=dev-libs/hashdb-3.1.0 )"
+
+DEPEND="${RDEPEND}
+	doc? ( app-doc/doxygen )
+	sys-devel/flex
+	virtual/man
+	virtual/pkgconfig"
+
+src_prepare() {
+	# Add support hashdb-3.1.0 for old bulk_extractor versions
+	# https://github.com/NPS-DEEP/hashdb/wiki/hashdb-3.1.0-and-bulk_extractor
+	use hashdb && \
+		eapply "${FILESDIR}/${P}_hashdb-3.1.0_and_old_bulk_extractor.patch"
+
+	# Using -I rather than -isystem for BOOST_CPPFLAGS
+	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70129
+	# Add exiv-0.27.0 support and other minor fixes...
+	eapply "${FILESDIR}/add_exiv2-0.27.0_support.patch"
+	eapply "${FILESDIR}/fix_call_of_overloaded_errors.patch"
+	eapply "${FILESDIR}/other_minor_fixes.patch"
+
+	eautoreconf
+	eapply_user
+}
+
+src_configure() {
+	append-cxxflags -std=c++11
+
+	# null — is true for ./configure options
+	econf \
+		--without-o3 \
+		--disable-libewf \
+		$(use aff || echo "--disable-afflib") \
+		$(use beviewer || echo "--disable-BEViewer") \
+		$(use exiv2 && echo "--enable-exiv2") \
+		$(use hashdb || echo "--disable-hashdb") \
+		$(use rar || echo "--disable-rar" )
+}
+
+src_install() {
+	dobin src/${PN} plugins/plugin_test
+	doman man/*.1
+	dodoc AUTHORS ChangeLog NEWS README
+
+	if use doc ; then
+		pushd doc/doxygen >/dev/null || die
+		doxygen || die "doxygen failed"
+		popd >/dev/null || die
+
+		dodoc -r \
+			doc/doxygen/html \
+			doc/*.{pdf,txt,md}
+	fi
+
+	if use beviewer; then
+		local bev_dir="/opt/beviewer-${PV}"
+
+		insinto "${bev_dir}"
+		doins java_gui/BEViewer.jar
+
+		insinto /usr/share/pixmaps
+		newins java_gui/icons/24/run-build-install.png ${PN}.png
+
+		make_wrapper "beviewer" \
+			"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
+		make_desktop_entry \
+			"beviewer" \
+			"BEViewer (bulk_extractor)" \
+			"${PN}" "Utility"
+	fi
+}
+
+pkg_preinst() {
+	use beviewer && gnome2_icon_savelist
+}
+
+pkg_postinst() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
+
+pkg_postrm() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
diff --git a/app-forensics/bulk_extractor/bulk_extractor-1.5.5.ebuild b/app-forensics/bulk_extractor/bulk_extractor-1.5.5.ebuild
deleted file mode 100644
index 1a7268a3f..000000000
--- a/app-forensics/bulk_extractor/bulk_extractor-1.5.5.ebuild
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 1999-2014 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Header: $
-
-EAPI=5
-
-DESCRIPTION="Scans a disk image, directory or file and extracts useful information"
-HOMEPAGE="http://www.forensicswiki.org/wiki/Bulk_extractor"
-SRC_URI="http://digitalcorpora.org/downloads/bulk_extractor/${P}.tar.gz"
-
-LICENSE="GPL-2"
-SLOT="0"
-KEYWORDS="~x86 ~amd64 ~arm"
-IUSE="exiv2 +ewf +aff sqlite"
-
-DEPEND="aff? ( app-forensics/afflib )
-	ewf? ( app-forensics/libewf )
-	sqlite? ( dev-db/sqlite )
-	exiv2? ( media-gfx/exiv2 )
-	dev-libs/boost[threads]
-	dev-libs/expat
-	dev-libs/openssl
-	sys-libs/zlib"
-RDEPEND="${DEPEND}"
-
-#DOCS=( AUTHORS ChangeLog README doc/2013.COSE.bulk_extractor.pdf doc/bulk_extractor.html )
diff --git a/app-forensics/bulk_extractor/bulk_extractor-1.6.0_p20181112.ebuild b/app-forensics/bulk_extractor/bulk_extractor-1.6.0_p20181112.ebuild
new file mode 100644
index 000000000..9e6f01cf4
--- /dev/null
+++ b/app-forensics/bulk_extractor/bulk_extractor-1.6.0_p20181112.ebuild
@@ -0,0 +1,120 @@
+# Copyright 1999-2019 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+inherit autotools eutils git-r3 gnome2-utils xdg-utils
+
+DESCRIPTION="Scans a disk image for regular expressions and other content"
+HOMEPAGE="https://github.com/simsong/bulk_extractor"
+SRC_URI=""
+
+EGIT_REPO_URI="https://github.com/simsong/bulk_extractor"
+if [[ ${PV} != *9999 ]]; then
+	#EGIT_COMMIT="${PV}"
+	EGIT_COMMIT="215ed3f1c0ef2dfc0a662cfbe7448ab9cbe2f511"
+	KEYWORDS="~amd64 ~x86"
+fi
+
+RESTRICT="mirror"
+LICENSE="GPL-2"
+SLOT="0"
+IUSE="aff doc +beviewer +ewf +exiv2 hashdb rar"
+
+RDEPEND="
+	aff? ( app-forensics/afflib )
+	beviewer? ( virtual/jdk:* )
+	dev-libs/boost[threads]
+	dev-libs/expat
+	dev-libs/openssl:0=
+	dev-db/sqlite:3
+	dev-libs/libxml2
+	ewf? ( app-forensics/libewf )
+	exiv2? ( >=media-gfx/exiv2-0.27.0 )
+	sys-libs/zlib
+	hashdb? ( >=dev-libs/hashdb-3.1.0 )"
+
+DEPEND="${RDEPEND}
+	doc? ( app-doc/doxygen )
+	sys-devel/flex
+	virtual/man
+	virtual/pkgconfig"
+
+src_prepare() {
+	# Update to the latest commit for: src/scan_hashdb.cpp
+	# https://github.com/simsong/bulk_extractor/blob/8bb26e4c16f543fd6c912521147615bfa48e545e/src/scan_hashdb.cpp
+	use hashdb && \
+		eapply "${FILESDIR}/update_to_match_hashdb_source_interface_change.patch"
+
+	# Add exiv-0.27.0 support and other minor fixes...
+	eapply "${FILESDIR}/add_exiv2-0.27.0_support.patch"
+	eapply "${FILESDIR}/fix_call_of_overloaded_errors.patch"
+
+	eautoreconf
+	eapply_user
+}
+
+src_configure() {
+	econf \
+		--without-o3 \
+		$(use aff || echo "--disable-afflib") \
+		$(use beviewer || echo "--disable-BEViewer") \
+		$(use ewf || echo "--disable-libewf") \
+		$(use exiv2 && echo "--enable-exiv2") \
+		$(use hashdb || echo "--disable-hashdb") \
+		$(use rar || echo "--disable-rar" )
+}
+
+src_install() {
+	dobin src/${PN} plugins/plugin_test
+	doman man/*.1
+	dodoc AUTHORS ChangeLog NEWS README.md
+
+	if use doc ; then
+		pushd doc/doxygen >/dev/null || die
+		doxygen || die "doxygen failed"
+		popd >/dev/null || die
+
+		dodoc -r \
+			doc/doxygen/html \
+			doc/Diagnostics_Notes \
+			doc/announce \
+			doc/*.{pdf,txt,md} \
+			doc/programmer_manual/*.pdf
+	fi
+
+	if use beviewer; then
+		local bev_dir="/opt/beviewer-${PV}"
+
+		insinto "${bev_dir}"
+		doins java_gui/BEViewer.jar
+
+		insinto /usr/share/pixmaps
+		newins java_gui/icons/24/run-build-install.png ${PN}.png
+
+		make_wrapper "beviewer" \
+			"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
+		make_desktop_entry \
+			"beviewer" \
+			"BEViewer (bulk_extractor)" \
+			"${PN}" "Utility"
+	fi
+}
+
+pkg_preinst() {
+	use beviewer && gnome2_icon_savelist
+}
+
+pkg_postinst() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
+
+pkg_postrm() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
diff --git a/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild b/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild
new file mode 100644
index 000000000..9e6f01cf4
--- /dev/null
+++ b/app-forensics/bulk_extractor/bulk_extractor-9999.ebuild
@@ -0,0 +1,120 @@
+# Copyright 1999-2019 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=6
+
+inherit autotools eutils git-r3 gnome2-utils xdg-utils
+
+DESCRIPTION="Scans a disk image for regular expressions and other content"
+HOMEPAGE="https://github.com/simsong/bulk_extractor"
+SRC_URI=""
+
+EGIT_REPO_URI="https://github.com/simsong/bulk_extractor"
+if [[ ${PV} != *9999 ]]; then
+	#EGIT_COMMIT="${PV}"
+	EGIT_COMMIT="215ed3f1c0ef2dfc0a662cfbe7448ab9cbe2f511"
+	KEYWORDS="~amd64 ~x86"
+fi
+
+RESTRICT="mirror"
+LICENSE="GPL-2"
+SLOT="0"
+IUSE="aff doc +beviewer +ewf +exiv2 hashdb rar"
+
+RDEPEND="
+	aff? ( app-forensics/afflib )
+	beviewer? ( virtual/jdk:* )
+	dev-libs/boost[threads]
+	dev-libs/expat
+	dev-libs/openssl:0=
+	dev-db/sqlite:3
+	dev-libs/libxml2
+	ewf? ( app-forensics/libewf )
+	exiv2? ( >=media-gfx/exiv2-0.27.0 )
+	sys-libs/zlib
+	hashdb? ( >=dev-libs/hashdb-3.1.0 )"
+
+DEPEND="${RDEPEND}
+	doc? ( app-doc/doxygen )
+	sys-devel/flex
+	virtual/man
+	virtual/pkgconfig"
+
+src_prepare() {
+	# Update to the latest commit for: src/scan_hashdb.cpp
+	# https://github.com/simsong/bulk_extractor/blob/8bb26e4c16f543fd6c912521147615bfa48e545e/src/scan_hashdb.cpp
+	use hashdb && \
+		eapply "${FILESDIR}/update_to_match_hashdb_source_interface_change.patch"
+
+	# Add exiv-0.27.0 support and other minor fixes...
+	eapply "${FILESDIR}/add_exiv2-0.27.0_support.patch"
+	eapply "${FILESDIR}/fix_call_of_overloaded_errors.patch"
+
+	eautoreconf
+	eapply_user
+}
+
+src_configure() {
+	econf \
+		--without-o3 \
+		$(use aff || echo "--disable-afflib") \
+		$(use beviewer || echo "--disable-BEViewer") \
+		$(use ewf || echo "--disable-libewf") \
+		$(use exiv2 && echo "--enable-exiv2") \
+		$(use hashdb || echo "--disable-hashdb") \
+		$(use rar || echo "--disable-rar" )
+}
+
+src_install() {
+	dobin src/${PN} plugins/plugin_test
+	doman man/*.1
+	dodoc AUTHORS ChangeLog NEWS README.md
+
+	if use doc ; then
+		pushd doc/doxygen >/dev/null || die
+		doxygen || die "doxygen failed"
+		popd >/dev/null || die
+
+		dodoc -r \
+			doc/doxygen/html \
+			doc/Diagnostics_Notes \
+			doc/announce \
+			doc/*.{pdf,txt,md} \
+			doc/programmer_manual/*.pdf
+	fi
+
+	if use beviewer; then
+		local bev_dir="/opt/beviewer-${PV}"
+
+		insinto "${bev_dir}"
+		doins java_gui/BEViewer.jar
+
+		insinto /usr/share/pixmaps
+		newins java_gui/icons/24/run-build-install.png ${PN}.png
+
+		make_wrapper "beviewer" \
+			"/usr/bin/java -Xmx1g -jar \"${bev_dir}/BEViewer.jar\""
+		make_desktop_entry \
+			"beviewer" \
+			"BEViewer (bulk_extractor)" \
+			"${PN}" "Utility"
+	fi
+}
+
+pkg_preinst() {
+	use beviewer && gnome2_icon_savelist
+}
+
+pkg_postinst() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
+
+pkg_postrm() {
+	if use beviewer; then
+		xdg_desktop_database_update
+		gnome2_icon_cache_update
+	fi
+}
diff --git a/app-forensics/bulk_extractor/files/add_exiv2-0.27.0_support.patch b/app-forensics/bulk_extractor/files/add_exiv2-0.27.0_support.patch
new file mode 100644
index 000000000..63b33075a
--- /dev/null
+++ b/app-forensics/bulk_extractor/files/add_exiv2-0.27.0_support.patch
@@ -0,0 +1,14 @@
+diff -ur a/src/dfxml/src/dfxml_writer.cpp b/src/dfxml/src/dfxml_writer.cpp
+--- a/src/dfxml/src/dfxml_writer.cpp    2014-09-16 22:34:02.000000000 +0400
++++ b/src/dfxml/src/dfxml_writer.cpp    2019-03-17 10:28:31.797180265 +0300
+@@ -646,6 +646,10 @@
+ #include <exiv2/image.hpp>
+ #include <exiv2/exif.hpp>
+ #include <exiv2/error.hpp>
++#include <exiv2/exiv2.hpp>
++#if EXIV2_TEST_VERSION(0, 27, 0)
++#include <exiv2/version.hpp>
++#endif
+ #endif
+ 
+ #ifdef HAVE_HASHDB
diff --git a/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch
new file mode 100644
index 000000000..e38721397
--- /dev/null
+++ b/app-forensics/bulk_extractor/files/bulk_extractor-1.5.5_hashdb-3.1.0_and_old_bulk_extractor.patch
@@ -0,0 +1,733 @@
+diff -ur a/src/scan_hashdb.cpp b/src/scan_hashdb.cpp
+--- a/src/scan_hashdb.cpp	2014-09-16 22:34:00.000000000 +0400
++++ b/src/scan_hashdb.cpp	2019-03-16 14:07:05.887464616 +0300
+@@ -31,47 +31,146 @@
+ 
+ #ifdef HAVE_HASHDB
+ 
++//#define DEBUG_V2_OUT
++
+ #include "hashdb.hpp"
+ #include <dfxml/src/hash_t.h>
+ 
+ #include <iostream>
+-#include <unistd.h>	// for getpid
+-#include <sys/types.h>	// for getpid
++#include <cmath>
++#include <unistd.h> // for getpid
++#include <sys/types.h>  // for getpid
+ 
+ // user settings
+-static std::string hashdb_mode="none";                       // import or scan
+-static uint32_t hashdb_block_size=4096;                      // import or scan
+-static bool hashdb_ignore_empty_blocks=true;                 // import or scan
+-static std::string hashdb_scan_path_or_socket="your_hashdb_directory"; // scan only
+-static size_t hashdb_scan_sector_size = 512;                    // scan only
+-static size_t hashdb_import_sector_size = 4096;                 // import only
+-static std::string hashdb_import_repository_name="default_repository"; // import only
+-static uint32_t hashdb_import_max_duplicates=0;                 // import only
++static std::string hashdb_mode="none";                                 // import or scan
++static uint32_t hashdb_block_size=512;                                 // import or scan
++static uint32_t hashdb_step_size=512;                                  // import or scan
++static std::string hashdb_scan_path="your_hashdb_directory";           // scan only
++static std::string hashdb_repository_name="default_repository";        // import only
++static uint32_t hashdb_max_feature_file_lines=0;                       // scan only for feature file
+ 
+ // runtime modes
+ // scanner mode
+ enum mode_type_t {MODE_NONE, MODE_SCAN, MODE_IMPORT};
+ static mode_type_t mode = MODE_NONE;
+ 
+-// internal helper functions
+-static void do_import(const class scanner_params &sp,
+-                      const recursion_control_block &rcb);
+-static void do_scan(const class scanner_params &sp,
+-                    const recursion_control_block &rcb);
+-inline bool is_empty_block(const uint8_t *buf);
+-
+ // global state
+ 
+ // hashdb directory, import only
+ static std::string hashdb_dir;
+ 
+ // hash type
+-typedef md5_t hash_t;
+ typedef md5_generator hash_generator;
+ 
+ // hashdb manager
+-typedef hashdb_t__<hash_t> hashdb_t;
+-hashdb_t* hashdb;
++static hashdb::import_manager_t* import_manager;
++static hashdb::scan_manager_t* scan_manager;
++
++static void do_import(const class scanner_params &sp,
++                      const recursion_control_block &rcb);
++static void do_scan(const class scanner_params &sp,
++                    const recursion_control_block &rcb);
++
++
++// safely hash sbuf range without overflow failure
++inline const md5_t hash_one_block(const sbuf_t &sbuf)
++{
++    if (sbuf.bufsize >= hashdb_block_size) {
++        // hash from the beginning
++        return hash_generator::hash_buf(sbuf.buf, hashdb_block_size);
++    }
++    // hash the available part and zero-fill
++    hash_generator g;
++    g.update(sbuf.buf, sbuf.bufsize);
++
++    // hash in extra zeros to fill out the block
++    size_t extra = hashdb_block_size - sbuf.bufsize;
++    std::vector<uint8_t> zeros(extra);
++    g.update(&zeros[0], extra);
++    return g.final();
++}
++
++// rules for determining if a block should be ignored
++static bool ramp_trait(const sbuf_t &sbuf)
++{
++    if (sbuf.pagesize < 8) {
++        // not enough to process
++        return false;
++    }
++
++    uint32_t count = 0;
++    for(size_t i=0;i<sbuf.pagesize-8;i+=4){
++        // note that little endian is detected and big endian is not detected
++        if (sbuf.get32u(i)+1 == sbuf.get32u(i+4)) {
++            count += 1;
++        }
++    }
++    return count > sbuf.pagesize/8;
++}
++
++static bool hist_trait(const sbuf_t &sbuf)
++{
++    if (sbuf.pagesize < hashdb_block_size) {
++        // do not perform any histogram analysis on short blocks
++        return false;
++    }
++
++    std::map<uint32_t,uint32_t> hist;
++    for(size_t i=0;i<sbuf.pagesize-4;i+=4){
++        hist[sbuf.get32uBE(i)] += 1;
++    }
++    if (hist.size() < 3) return true;
++    for (std::map<uint32_t,uint32_t>::const_iterator it = hist.begin();it != hist.end(); it++){
++        if ((it->second) > hashdb_block_size/16){
++            return true;
++        }
++    }
++    return false;
++}
++
++static bool whitespace_trait(const sbuf_t &sbuf)
++{
++    size_t count = 0;
++    for(size_t i=0;i<sbuf.pagesize;i++){
++        if (isspace(sbuf[i])) count+=1;
++    }
++    return count >= (sbuf.pagesize * 3)/4;
++}
++
++static bool monotonic_trait(const sbuf_t &sbuf)
++{
++    if (sbuf.pagesize < 16) {
++        // not enough data
++        return false;
++    }
++
++    const double total = sbuf.pagesize / 4.0;
++    int increasing = 0, decreasing = 0, same = 0;
++    for (size_t i=0; i+8<sbuf.pagesize; i+=4) {
++        if (sbuf.get32u(i+4) > sbuf.get32u(i)) {
++            increasing++;
++        } else if (sbuf.get32u(i+4) < sbuf.get32u(i)) {
++            decreasing++;
++        } else {
++            same++;
++        }
++    }
++    if (increasing / total >= 0.75) return true;
++    if (decreasing / total >= 0.75) return true;
++    if (same / total >= 0.75) return true;
++    return false;
++}
++
++// detect if block is all the same
++inline bool empty_sbuf(const sbuf_t &sbuf)
++{
++    for (size_t i=1; i<sbuf.bufsize; i++) {
++        if (sbuf[i] != sbuf[0]) {
++            return false;
++        }
++    }
++    return true;                        // all the same
++}
+ 
+ extern "C"
+ void scan_hashdb(const class scanner_params &sp,
+@@ -82,9 +181,12 @@
+         case scanner_params::PHASE_STARTUP: {
+ 
+             // set properties for this scanner
++            std::string desc = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
++            desc += std::string(" (hashdb version") + std::string(hashdb_version()) + std::string(")");
++
+             sp.info->name        = "hashdb";
+             sp.info->author      = "Bruce Allen";
+-            sp.info->description = "Search cryptographic hash IDs against hashes in a hashdb block hash database";
++            sp.info->description = desc;
+             sp.info->flags       = scanner_info::SCANNER_DISABLED;
+ 
+             // hashdb_mode
+@@ -97,60 +199,52 @@
+ 
+             // hashdb_block_size
+             sp.info->get_config("hashdb_block_size", &hashdb_block_size,
+-                         "Hash block size, in bytes, used to generate hashes");
++                         "Selects the block size to hash, in bytes.");
+ 
+-            // hashdb_ignore_empty_blocks
+-            sp.info->get_config("hashdb_ignore_empty_blocks", &hashdb_ignore_empty_blocks,
+-                         "Selects to ignore empty blocks.");
+-
+-            // hashdb_scan_path_or_socket
+-            std::stringstream ss_hashdb_scan_path_or_socket;
+-            ss_hashdb_scan_path_or_socket
+-                << "File path to a hash database or\n"
+-                << "      socket to a hashdb server to scan against.  Valid only in scan mode.";
+-            sp.info->get_config("hashdb_scan_path_or_socket", &hashdb_scan_path_or_socket,
+-                                ss_hashdb_scan_path_or_socket.str());
+-
+-            // hashdb_scan_sector_size
+-            std::stringstream ss_hashdb_scan_sector_size;
+-            ss_hashdb_scan_sector_size
+-                << "Selects the scan sector size.  Scans along\n"
+-                << "      sector boundaries.  Valid only in scan mode.";
+-            sp.info->get_config("hashdb_scan_sector_size", &hashdb_scan_sector_size,
+-                                ss_hashdb_scan_sector_size.str());
+-
+-            // hashdb_import_sector_size
+-            std::stringstream ss_hashdb_import_sector_size;
+-            ss_hashdb_import_sector_size
+-                << "Selects the import sector size.  Imports along\n"
+-                << "      sector boundaries.  Valid only in import mode.";
+-            sp.info->get_config("hashdb_import_sector_size", &hashdb_import_sector_size,
+-                                ss_hashdb_import_sector_size.str());
++            // hashdb_step_size
++            std::stringstream ss_hashdb_step_size;
++            ss_hashdb_step_size
++                << "Selects the step size.  Scans and imports along\n"
++                << "      this step value.";
++            sp.info->get_config("hashdb_step_size", &hashdb_step_size,
++                                ss_hashdb_step_size.str());
++
++
++            // hashdb_scan_path
++            std::stringstream ss_hashdb_scan_path;
++            ss_hashdb_scan_path
++                << "File path to a hash database to scan against.\n"
++                << "      Valid only in scan mode.";
++            sp.info->get_config("hashdb_scan_path", &hashdb_scan_path,
++                                ss_hashdb_scan_path.str());
+ 
+-            // hashdb_import_repository_name
++            // hashdb_repository_name
+             std::stringstream ss_hashdb_import_repository_name;
+             ss_hashdb_import_repository_name
+                 << "Sets the repository name to\n"
+                 << "      attribute the import to.  Valid only in import mode.";
+-            sp.info->get_config("hashdb_import_repository_name",
+-                                &hashdb_import_repository_name,
++            sp.info->get_config("hashdb_repository_name",
++                                &hashdb_repository_name,
+                                 ss_hashdb_import_repository_name.str());
+ 
+-            // hashdb_import_max_duplicates
+-            std::stringstream ss_hashdb_import_max_duplicates;
+-            ss_hashdb_import_max_duplicates
+-                << "The maximum number of duplicates to import\n"
+-                << "      for a given hash value, or 0 for no limit.  Valid only in import mode.";
+-            sp.info->get_config("hashdb_import_max_duplicates", &hashdb_import_max_duplicates,
+-                                ss_hashdb_import_max_duplicates.str());
+-
+-
+             // configure the feature file to accept scan features
+             // but only if in scan mode
+             if (hashdb_mode == "scan") {
+                 sp.info->feature_names.insert("identified_blocks");
++#ifdef DEBUG_V2_OUT
++                sp.info->feature_names.insert("identified_blocks2");
++#endif
+             }
+ 
++            // hashdb_max_feature_file_lines
++            std::stringstream ss_hashdb_max_feature_file_lines;
++            ss_hashdb_max_feature_file_lines
++                << "The maximum number of features lines to record\n"
++                << "      or 0 for no limit.  Valid only in scan mode.";
++            sp.info->get_config("hashdb_max_feature_file_lines", &hashdb_max_feature_file_lines,
++                                ss_hashdb_max_feature_file_lines.str());
++
++
+             return;
+         }
+ 
+@@ -168,62 +262,27 @@
+             } else {
+                 // bad mode
+                 std::cerr << "Error.  Parameter 'hashdb_mode' value '"
+-                          << hashdb_mode << "' is invalid.\n"
++                          << hashdb_mode << "' must be [none|import|scan].\n"
+                           << "Cannot continue.\n";
+                 exit(1);
+             }
+ 
+-            // hashdb_ignore_empty_blocks
+-            // checks not performed
+-
+             // hashdb_block_size
+             if (hashdb_block_size == 0) {
+                 std::cerr << "Error.  Value for parameter 'hashdb_block_size' is invalid.\n"
+-                         << "Cannot continue.\n";
+-                exit(1);
+-            }
+-
+-            // hashdb_scan_path_or_socket
+-            // checks not performed
+-
+-            // hashdb_scan_sector_size
+-            if (hashdb_scan_sector_size == 0) {
+-                std::cerr << "Error.  Value for parameter 'hashdb_scan_sector_size' is invalid.\n"
+-                          << "Cannot continue.\n";
+-                exit(1);
+-            }
+-
+-            // for valid operation, scan sectors must align on hash block boundaries
+-            if (mode == MODE_SCAN && hashdb_block_size % hashdb_scan_sector_size != 0) {
+-                std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
+-                          << " or hashdb scan sector size=" << hashdb_scan_sector_size << ".\n"
+-                          << "Sectors must align on hash block boundaries.\n"
+-                          << "Specifically, hashdb_block_size \% hashdb_scan_sector_size must be zero.\n"
+-                          << "Cannot continue.\n";
+-                exit(1);
+-            }
+-
+-            // hashdb_import_sector_size
+-            if (hashdb_import_sector_size == 0) {
+-                std::cerr << "Error.  Value for parameter 'hashdb_import_sector_size' is invalid.\n"
+                           << "Cannot continue.\n";
+                 exit(1);
+             }
+ 
+-            // for valid operation, import sectors must align on hash block boundaries
+-            if (mode == MODE_IMPORT && hashdb_block_size % hashdb_import_sector_size != 0) {
+-                std::cerr << "Error: invalid hashdb block size=" << hashdb_block_size
+-                          << " or hashdb import sector size=" << hashdb_import_sector_size << ".\n"
+-                          << "Sectors must align on hash block boundaries.\n"
+-                          << "Specifically, hashdb_block_size \% hashdb_import_sector_size must be zero.\n"
++            // hashdb_step_size
++            if (hashdb_step_size == 0) {
++                std::cerr << "Error.  Value for parameter 'hashdb_step_size' is invalid.\n"
+                           << "Cannot continue.\n";
+                 exit(1);
+             }
+ 
+-            // hashdb_import_repository_name
+-            // checks not performed
+-            // hashdb_import_max_duplicates
+-            // checks not performed
++            // indicate hashdb version
++            std::cout << "hashdb: hashdb_version=" << hashdb_version() << "\n";
+ 
+             // perform setup based on mode
+             switch(mode) {
+@@ -231,40 +290,49 @@
+                     // set the path to the hashdb
+                     hashdb_dir = sp.fs.get_outdir() + "/" + "hashdb.hdb";
+ 
+-                    // create the new hashdb manager for importing
+-                    // currently, hashdb_dir is required to not exist
+-                    hashdb = new hashdb_t(hashdb_dir,
+-                                          hashdb_block_size,
+-                                          hashdb_import_max_duplicates);
+-
+-                    // show relavent settable options
+-                    std::string temp1((hashdb_ignore_empty_blocks) ? "YES" : "NO");
++                    // show relevant settable options
+                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
+                               << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
+-                              << "hashdb: hashdb_ignore_empty_blocks=" << temp1 << "\n"
+-                              << "hashdb: hashdb_import_sector_size= " << hashdb_import_sector_size << "\n"
+-                              << "hashdb: hashdb_import_repository_name= " << hashdb_import_repository_name << "\n"
+-                              << "hashdb: hashdb_import_max_duplicates=" << hashdb_import_max_duplicates << "\n"
++                              << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
++                              << "hashdb: hashdb_repository_name= " << hashdb_repository_name << "\n"
+                               << "hashdb: Creating hashdb directory " << hashdb_dir << "\n";
++
++                    // open hashdb for importing
++                    // currently, hashdb_dir is required to not exist
++                    hashdb::settings_t settings;
++                    settings.block_size = hashdb_block_size;
++                    std::string error_message = hashdb::create_hashdb(hashdb_dir, settings, "");
++                    if (error_message.size() != 0) {
++                        std::cerr << "Error: " << error_message << "\n";
++                        exit(1);
++                    }
++                    import_manager = new hashdb::import_manager_t(hashdb_dir, "");
+                     return;
+                 }
+ 
+                 case MODE_SCAN: {
+-                    // show relavent settable options
+-                    std::string temp2((hashdb_ignore_empty_blocks) ? "YES" : "NO");
++                    // show relevant settable options
+                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
+                               << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
+-                              << "hashdb: hashdb_ignore_empty_blocks=" << temp2 << "\n"
+-                              << "hashdb: hashdb_scan_path_or_socket=" << hashdb_scan_path_or_socket << "\n"
+-                              << "hashdb: hashdb_scan_sector_size=" << hashdb_scan_sector_size << "\n";
++                              << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
++                              << "hashdb: hashdb_scan_path=" << hashdb_scan_path << "\n"
++                              << "hashdb: hashdb_max_feature_file_lines=" << hashdb_max_feature_file_lines
++                              << "\n";
++
++                    // open hashdb for scanning
++                    scan_manager = new hashdb::scan_manager_t(hashdb_scan_path);
++
++                    // set the feature recorder to leave context alone but fix invalid utf8
++                    sp.fs.get_name("identified_blocks")->set_flag(feature_recorder::FLAG_XML);
++#ifdef DEBUG_V2_OUT
++                    sp.fs.get_name("identified_blocks2")->set_flag(feature_recorder::FLAG_XML);
++#endif
+ 
+-                    // open the hashdb manager for scanning
+-                    hashdb = new hashdb_t(hashdb_scan_path_or_socket);
+                     return;
+                 }
+ 
+                 case MODE_NONE: {
+-                    // show relavent settable options
++                    // show relevant settable options
+                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
+                               << "WARNING: the hashdb scanner is enabled but it will not perform any action\n"
+                               << "because no mode has been selected.  Please either select a hashdb mode or\n"
+@@ -285,7 +353,7 @@
+         case scanner_params::PHASE_SCAN: {
+             switch(mode) {
+                 case MODE_IMPORT:
+-                     do_import(sp, rcb);
++                    do_import(sp, rcb);
+                      return;
+ 
+                 case MODE_SCAN:
+@@ -301,14 +369,17 @@
+         // shutdown
+         case scanner_params::PHASE_SHUTDOWN: {
+             switch(mode) {
+-                case MODE_SCAN:
+-                     delete hashdb;
+-                     return;
+                 case MODE_IMPORT:
+-                     delete hashdb;
+-                     return;
++                    delete import_manager;
++                    return;
++
++                case MODE_SCAN:
++                    delete scan_manager;
++                    return;
+                 default:
+-                     return;
++                    // the user should have just left the scanner disabled.
++                    // no action.
++                    return;
+             }
+         }
+ 
+@@ -327,170 +398,154 @@
+     // get the sbuf
+     const sbuf_t& sbuf = sp.sbuf;
+ 
+-    // there should be at least one block to process
+-    if (sbuf.pagesize < hashdb_block_size) {
+-      return;
+-    }
+-
+-    // get count of blocks to process
+-    size_t count = sbuf.bufsize / hashdb_import_sector_size;
+-    while ((count * hashdb_import_sector_size) +
+-           (hashdb_block_size - hashdb_import_sector_size) > sbuf.pagesize) {
+-      --count;
+-    }
+-
+-    // allocate space on heap for import_input
+-    std::vector<hashdb_t::import_element_t>* import_input =
+-       new std::vector<hashdb_t::import_element_t>;
++    // get the filename from sbuf without the sbuf map file delimiter
++    std::string path_without_map_file_delimiter =
++              (sbuf.pos0.path.size() > 4) ?
++              std::string(sbuf.pos0.path, 0, sbuf.pos0.path.size() - 4) : "";
++ 
++    // get the filename to use as the source filename
++    std::stringstream ss;
++    const size_t p=sbuf.pos0.path.find('/');
++    if (p==std::string::npos) {
++        // no directory in forensic path so explicitly include the filename
++        ss << sp.fs.get_input_fname();
++        if (sbuf.pos0.isRecursive()) {
++            // forensic path is recursive so add "/" + forensic path
++            ss << "/" << path_without_map_file_delimiter;
++        }
++    } else {
++        // directory in forensic path so print forensic path as is
++        ss << path_without_map_file_delimiter;
++    }
++    std::string source_filename = ss.str();
++
++    // calculate the file hash using the sbuf page
++    const md5_t sbuf_hash = hash_generator::hash_buf(sbuf.buf, sbuf.pagesize);
++    const std::string file_binary_hash =
++               std::string(reinterpret_cast<const char*>(sbuf_hash.digest), 16);
++
++    // track count values
++    size_t zero_count = 0;
++    size_t nonprobative_count = 0;
+ 
+-    // import all the cryptograph hash values from all the blocks in sbuf
+-    for (size_t i=0; i < count; ++i) {
++    // import the cryptograph hash values from all the blocks in sbuf
++    for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
+ 
+-        // calculate the offset associated with this index
+-        size_t offset = i * hashdb_import_sector_size;
++        // Create a child sbuf of what we would hash
++        const sbuf_t sbuf_to_hash(sbuf,offset,hashdb_block_size);
+ 
+         // ignore empty blocks
+-        if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
++        if (empty_sbuf(sbuf_to_hash)){
++            ++zero_count;
+             continue;
+         }
+ 
+-        // calculate the hash for this sector-aligned hash block
+-        hash_t hash = hash_generator::hash_buf(
+-                                 sbuf.buf + offset,
+-                                 hashdb_block_size);
+-
+-        // compose the filename based on the forensic path
+-        std::stringstream ss;
+-        size_t p=sbuf.pos0.path.find('/');
+-        if (p==std::string::npos) {
+-            // no directory in forensic path so explicitly include the filename
+-            ss << sp.fs.get_input_fname();
+-            if (sbuf.pos0.isRecursive()) {
+-                // forensic path is recursive so add "/" + forensic path
+-                ss << "/" << sbuf.pos0.path;
+-            }
+-        } else {
+-            // directory in forensic path so print forensic path as is
+-            ss << sbuf.pos0.path;
++        // calculate the hash for this import-sector-aligned hash block
++        const md5_t hash = hash_one_block(sbuf_to_hash);
++        const std::string binary_hash(reinterpret_cast<const char*>(hash.digest), 16);
++
++        // put together any block classification labels
++        // set flags based on specific tests on the block
++        // Construct an sbuf from the block and subject it to the other tests
++        const sbuf_t s(sbuf, offset, hashdb_block_size);
++        std::stringstream ss_flags;
++        if (ramp_trait(s))       ss_flags << "R";
++        if (hist_trait(s))       ss_flags << "H";
++        if (whitespace_trait(s)) ss_flags << "W";
++        if (monotonic_trait(s))  ss_flags << "M";
++
++        // NOTE: shannon16 is Disabled because its results were not useful
++        // and because it needs fixed to not generate sbuf read exception.
++        //if (ss_flags.str().size() > 0) ss_flags << "," << shannon16(s);
++
++        // flags means nonprobative
++        if (ss_flags.str().size() > 0) {
++            ++nonprobative_count;
+         }
+ 
+-        // calculate the offset from the start of the media image
+-        uint64_t image_offset = sbuf.pos0.offset + offset;
+-
+-        // create and add the import element to the import input
+-        import_input->push_back(hashdb_t::import_element_t(
+-                                 hash,
+-                                 hashdb_import_repository_name,
+-                                 ss.str(),
+-                                 image_offset));
+-    }
+-
+-    // perform the import
+-    int status = hashdb->import(*import_input);
+-
+-    if (status != 0) {
+-        std::cerr << "scan_hashdb import failure\n";
+-    }
+-
+-    // clean up
+-    delete import_input;
++        // import the hash
++        import_manager->insert_hash(binary_hash,
++                                    0,    // entropy
++                                    ss_flags.str(),
++                                    file_binary_hash);
++    }
++
++    // insert the source name pair
++    import_manager->insert_source_name(file_binary_hash,
++                              hashdb_repository_name, source_filename);
++
++    // insert the source data
++    import_manager->insert_source_data(file_binary_hash,
++                                       sbuf.pagesize,
++                                       "", // file type
++                                       zero_count,
++                                       nonprobative_count);
+ }
+ 
+ // perform scan
+ static void do_scan(const class scanner_params &sp,
+                     const recursion_control_block &rcb) {
+ 
++    // get the feature recorder
++    feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
++#ifdef DEBUG_V2_OUT
++    feature_recorder* identified_blocks_recorder2 = sp.fs.get_name("identified_blocks2");
++#endif
++
+     // get the sbuf
+     const sbuf_t& sbuf = sp.sbuf;
+ 
+-    // there should be at least one block to process
+-    if (sbuf.pagesize < hashdb_block_size) {
+-      return;
+-    }
++    // process cryptographic hash values for blocks along sector boundaries
++    for (size_t offset=0; offset<sbuf.pagesize; offset+=hashdb_step_size) {
+ 
+-    // get count of blocks to process
+-    size_t count = sbuf.bufsize / hashdb_scan_sector_size;
+-    while ((count * hashdb_scan_sector_size) +
+-           (hashdb_block_size - hashdb_scan_sector_size) > sbuf.pagesize) {
+-      --count;
+-    }
+-
+-    // allocate space on heap for scan_input
+-    std::vector<hash_t>* scan_input = new std::vector<hash_t>;
+-
+-    // allocate space on heap for the offset lookup table
+-    std::vector<uint32_t>* offset_lookup_table = new std::vector<uint32_t>;
+-
+-    // get the cryptograph hash values of all the blocks along
+-    // sector boundaries from sbuf
+-    for (size_t i=0; i<count; ++i) {
++        // stop recording if feature file line count is at requested max
++        if (hashdb_max_feature_file_lines > 0 && identified_blocks_recorder->count() >=
++                                                   hashdb_max_feature_file_lines) {
++            break;
++        }
+ 
+-        // calculate the offset associated with this index
+-        size_t offset = i * hashdb_scan_sector_size;
++        // Create a child sbuf of the block
++        const sbuf_t sbuf_to_hash(sbuf, offset, hashdb_block_size);
+ 
+         // ignore empty blocks
+-        if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
++        if (empty_sbuf(sbuf_to_hash)){
+             continue;
+         }
+ 
+-        // add the offset to the offset lookup table
+-        offset_lookup_table->push_back(offset);
+-
+-        // calculate and add the hash to the scan input
+-        scan_input->push_back(hash_generator::hash_buf(
+-                    sbuf.buf + offset, hashdb_block_size));
+-    }
+-
+-    // allocate space on heap for scan_output
+-    hashdb_t::scan_output_t* scan_output = new hashdb_t::scan_output_t;
+-
+-    // perform the scan
+-    int status = hashdb->scan(*scan_input, *scan_output);
+-
+-    if (status != 0) {
+-        std::cerr << "Error: scan_hashdb scan failure.  Aborting.\n";
+-        exit(1);
+-    }
+-
+-    // get the feature recorder
+-    feature_recorder* identified_blocks_recorder = sp.fs.get_name("identified_blocks");
++        // calculate the hash for this sector-aligned hash block
++        const md5_t hash = hash_one_block(sbuf_to_hash);
++        const std::string binary_hash =
++               std::string(reinterpret_cast<const char*>(hash.digest), 16);
++
++        // scan for the hash
++        std::string json_text = scan_manager->find_hash_json(
++                      hashdb::scan_mode_t::EXPANDED_OPTIMIZED, binary_hash);
++
++        if (json_text.size() == 0) {
++          // hash not found
++          continue;
++        }
+ 
+-    // record each feature returned in the response
+-    for (hashdb_t::scan_output_t::const_iterator it=scan_output->begin(); it!= scan_output->end(); ++it) {
++        // prepare fields to record the feature
+ 
+-        // prepare forensic path (pos0, feature, context)
+-        // as (pos0, hash_string, count_string)
++        // get hash_string from hash
++        std::string hash_string = hash.hexdigest();
+ 
+-        // pos0
+-        pos0_t pos0 = sbuf.pos0 + offset_lookup_table->at(it->first);
++        // record the feature, there is no context field
++        identified_blocks_recorder->write(sbuf.pos0+offset, hash_string, json_text);
+ 
+-        // hash_string
+-        std::string hash_string = scan_input->at(it->first).hexdigest();
++#ifdef DEBUG_V2_OUT
++        size_t count = scan_manager->find_hash_count(binary_hash);
+ 
+-        // count
++        // build context field
+         std::stringstream ss;
+-        ss << it->second;
+-        std::string count_string = ss.str();
++        ss << "{\"count\":" << count << "}";
+ 
+         // record the feature
+-        identified_blocks_recorder->write(pos0, hash_string, count_string);
+-    }
+-
+-    // clean up
+-    delete scan_input;
+-    delete offset_lookup_table;
+-    delete scan_output;
+-}
++        identified_blocks_recorder2->write(sbuf.pos0+offset, hash_string, ss.str());
++#endif
+ 
+-// detect if block is empty
+-inline bool is_empty_block(const uint8_t *buf) {
+-    for (size_t i=1; i<hashdb_block_size; i++) {
+-        if (buf[i] != buf[0]) {
+-            return false;
+-        }
+     }
+-    return true;
+ }
+ 
+ #endif
+-
diff --git a/app-forensics/bulk_extractor/files/fix_call_of_overloaded_errors.patch b/app-forensics/bulk_extractor/files/fix_call_of_overloaded_errors.patch
new file mode 100644
index 000000000..d87d838d7
--- /dev/null
+++ b/app-forensics/bulk_extractor/files/fix_call_of_overloaded_errors.patch
@@ -0,0 +1,24 @@
+diff -ur a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp
+--- a/src/scan_exiv2.cpp	2014-09-16 22:34:00.000000000 +0400
++++ b/src/scan_exiv2.cpp	2019-03-17 08:38:29.479753464 +0300
+@@ -68,7 +68,7 @@
+  * Used for helping to convert libexiv2's GPS format to decimal lat/long
+  */
+ 
+-static double stod(string s)
++static double sub_stod(string s)
+ {
+     double d=0;
+     sscanf(s.c_str(),"%lf",&d);
+@@ -78,9 +78,9 @@
+ static double rational(string s)
+ {
+     std::vector<std::string> parts = split(s,'/');
+-    if(parts.size()!=2) return stod(s);	// no slash, so return without
+-    double top = stod(parts[0]);
+-    double bot = stod(parts[1]);
++    if(parts.size()!=2) return sub_stod(s);	// no slash, so return without
++    double top = sub_stod(parts[0]);
++    double bot = sub_stod(parts[1]);
+     return bot>0 ? top / bot : top;
+ }
diff --git a/app-forensics/bulk_extractor/files/other_minor_fixes.patch b/app-forensics/bulk_extractor/files/other_minor_fixes.patch
new file mode 100644
index 000000000..a485cbb67
--- /dev/null
+++ b/app-forensics/bulk_extractor/files/other_minor_fixes.patch
@@ -0,0 +1,60 @@
+diff -ur a/configure.ac b/configure.ac
+--- a/configure.ac	2014-09-16 23:08:06.000000000 +0400
++++ b/configure.ac	2019-03-17 10:08:12.594871130 +0300
+@@ -150,7 +150,7 @@
+ AC_ARG_ENABLE([flexscanners],
+               AS_HELP_STRING([--disable-flexscanners], [disable FLEX-based scanners]),
+               [],
+-              [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]), flexscanners='yes'])
++              [AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]) flexscanners='yes'])
+ AM_CONDITIONAL([FLEXSCANNERS_ENABLED], [test "yes" = "$flexscanners"])
+ 
+ 
+diff -ur a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4
+--- a/m4/ax_boost_base.m4	2014-09-16 22:34:00.000000000 +0400
++++ b/m4/ax_boost_base.m4	2019-03-17 10:12:31.849532373 +0300
+@@ -107,7 +107,7 @@
+     dnl this location ist chosen if boost libraries are installed with the --layout=system option
+     dnl or if you install boost with RPM
+     if test "$ac_boost_path" != ""; then
+-        BOOST_CPPFLAGS="-isystem$ac_boost_path/include"
++        BOOST_CPPFLAGS="-I$ac_boost_path/include"
+         for ac_boost_path_tmp in $libsubdirs; do
+                 if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then
+                         BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp"
+@@ -126,7 +126,7 @@
+                     if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+                 done
+                 BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir"
+-                BOOST_CPPFLAGS="-isystem$ac_boost_path_tmp/include"
++                BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
+                 break;
+             fi
+         done
+@@ -179,7 +179,7 @@
+                         _version=$_version_tmp
+                     fi
+                     VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+-                    BOOST_CPPFLAGS="-isystem$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
++                    BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
+                 done
+             fi
+         else
+@@ -202,7 +202,7 @@
+             done
+ 
+             VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
+-            BOOST_CPPFLAGS="-isystem$best_path/include/boost-$VERSION_UNDERSCORE"
++            BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
+             if test "$ac_boost_lib_path" = ""; then
+                 for libsubdir in $libsubdirs ; do
+                     if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi
+@@ -221,7 +221,7 @@
+                     V_CHECK=`expr $stage_version_shorten \>\= $_version`
+                     if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
+                         AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
+-                        BOOST_CPPFLAGS="-isystem$BOOST_ROOT"
++                        BOOST_CPPFLAGS="-I$BOOST_ROOT"
+                         BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir"
+                     fi
+                 fi
diff --git a/app-forensics/bulk_extractor/files/update_to_match_hashdb_source_interface_change.patch b/app-forensics/bulk_extractor/files/update_to_match_hashdb_source_interface_change.patch
new file mode 100644
index 000000000..26702a828
--- /dev/null
+++ b/app-forensics/bulk_extractor/files/update_to_match_hashdb_source_interface_change.patch
@@ -0,0 +1,105 @@
+diff -ur a/src/scan_hashdb.cpp b/src/scan_hashdb.cpp
+--- a/src/scan_hashdb.cpp	2019-03-16 18:29:36.077818000 +0300
++++ b/src/scan_hashdb.cpp	2019-03-16 14:07:05.887464616 +0300
+@@ -38,12 +38,11 @@
+ 
+ #include <iostream>
+ #include <cmath>
+-#include <unistd.h>	// for getpid
+-#include <sys/types.h>	// for getpid
++#include <unistd.h> // for getpid
++#include <sys/types.h>  // for getpid
+ 
+ // user settings
+ static std::string hashdb_mode="none";                                 // import or scan
+-static uint32_t hashdb_byte_alignment=512;                             // import only
+ static uint32_t hashdb_block_size=512;                                 // import or scan
+ static uint32_t hashdb_step_size=512;                                  // import or scan
+ static std::string hashdb_scan_path="your_hashdb_directory";           // scan only
+@@ -198,14 +197,6 @@
+                 << "        scan    - Scan for matching block hashes.";
+             sp.info->get_config("hashdb_mode", &hashdb_mode, ss_hashdb_mode.str());
+ 
+-            // hashdb_byte_alignment
+-            std::stringstream ss_hashdb_byte_alignment;
+-            ss_hashdb_byte_alignment
+-                << "Selects the byte alignment to use in the new import\n"
+-                << "      database.";
+-            sp.info->get_config("hashdb_byte_alignment", &hashdb_byte_alignment,
+-                                ss_hashdb_byte_alignment.str());
+-
+             // hashdb_block_size
+             sp.info->get_config("hashdb_block_size", &hashdb_block_size,
+                          "Selects the block size to hash, in bytes.");
+@@ -276,13 +267,6 @@
+                 exit(1);
+             }
+ 
+-            // hashdb_byte_alignment
+-            if (hashdb_byte_alignment == 0) {
+-                std::cerr << "Error.  Value for parameter 'hashdb_byte_alignment' is invalid.\n"
+-                          << "Cannot continue.\n";
+-                exit(1);
+-            }
+-
+             // hashdb_block_size
+             if (hashdb_block_size == 0) {
+                 std::cerr << "Error.  Value for parameter 'hashdb_block_size' is invalid.\n"
+@@ -297,16 +281,6 @@
+                 exit(1);
+             }
+ 
+-            // for valid operation, scan sectors must align on byte aligned boundaries
+-            if (hashdb_step_size % hashdb_byte_alignment != 0) {
+-                std::cerr << "Error: invalid byte alignment=" << hashdb_byte_alignment
+-                          << " for step size=" << hashdb_step_size << ".\n"
+-                          << "Steps must fit along byte alignment boundaries.\n"
+-                          << "Specifically, hashdb_step_size \% hashdb_byte_alignment must be zero.\n"
+-                          << "Cannot continue.\n";
+-                exit(1);
+-            }
+-
+             // indicate hashdb version
+             std::cout << "hashdb: hashdb_version=" << hashdb_version() << "\n";
+ 
+@@ -318,7 +292,6 @@
+ 
+                     // show relevant settable options
+                     std::cout << "hashdb: hashdb_mode=" << hashdb_mode << "\n"
+-                              << "hashdb: hashdb_byte_alignment= " << hashdb_byte_alignment << "\n"
+                               << "hashdb: hashdb_block_size=" << hashdb_block_size << "\n"
+                               << "hashdb: hashdb_step_size= " << hashdb_step_size << "\n"
+                               << "hashdb: hashdb_repository_name= " << hashdb_repository_name << "\n"
+@@ -327,7 +300,6 @@
+                     // open hashdb for importing
+                     // currently, hashdb_dir is required to not exist
+                     hashdb::settings_t settings;
+-                    settings.byte_alignment = hashdb_byte_alignment;
+                     settings.block_size = hashdb_block_size;
+                     std::string error_message = hashdb::create_hashdb(hashdb_dir, settings, "");
+                     if (error_message.size() != 0) {
+@@ -472,9 +444,6 @@
+         const md5_t hash = hash_one_block(sbuf_to_hash);
+         const std::string binary_hash(reinterpret_cast<const char*>(hash.digest), 16);
+ 
+-        // calculate the offset from the start of the media image
+-        const uint64_t image_offset = sbuf_to_hash.pos0.offset;
+-
+         // put together any block classification labels
+         // set flags based on specific tests on the block
+         // Construct an sbuf from the block and subject it to the other tests
+@@ -498,8 +467,7 @@
+         import_manager->insert_hash(binary_hash,
+                                     0,    // entropy
+                                     ss_flags.str(),
+-                                    file_binary_hash,
+-                                    image_offset);
++                                    file_binary_hash);
+     }
+ 
+     // insert the source name pair
+@@ -581,4 +549,3 @@
+ }
+ 
+ #endif
+-
diff --git a/app-forensics/bulk_extractor/metadata.xml b/app-forensics/bulk_extractor/metadata.xml
new file mode 100644
index 000000000..4cd9358a1
--- /dev/null
+++ b/app-forensics/bulk_extractor/metadata.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+	<maintainer type="person">
+		<email>email@linxon.ru</email>
+		<name>Yury Martynov</name>
+	</maintainer>
+	<maintainer type="project">
+		<email>proxy-maint@gentoo.org</email>
+		<name>Proxy Maintainers</name>
+	</maintainer>
+	<use>
+		<flag name="aff">Add <pkg>app-forensics/afflib</pkg> support</flag>
+		<flag name="beviewer">Enable the BEViewer — is a User Interface for browsing features</flag>
+		<flag name="exiv2">Add <pkg>media-gfx/exiv2</pkg> support</flag>
+		<flag name="ewf">Add <pkg>app-forensics/libewf</pkg> support</flag>
+		<flag name="hashdb">Enable the hashdb scanner</flag>
+		<flag name="rar">Enable RAR decompression</flag>
+	</use>
+</pkgmetadata>