diff --git a/.github/scripts/etc-files-list-update/README.md b/.github/scripts/etc-files-list-update/README.md new file mode 100644 index 00000000..e5321fea --- /dev/null +++ b/.github/scripts/etc-files-list-update/README.md @@ -0,0 +1,14 @@ +# etc file list updater + +## overview +The purpose of this set of scripts is to scan deb packages from the ubuntu apt repository. + +## running +The script must be run from its working directory. +```bash +cd .github/scripts/etc-files-list-update && ./update.sh +``` + +## details +URLs for deb files that have already been scanned are stored in gzip format in the `deb-url-history/` directory. +The current ubuntu distro for which packages are retrieved is stored in the file `current_distro`. This should be changed every few years. diff --git a/.github/scripts/etc-files-list-update/current_distro b/.github/scripts/etc-files-list-update/current_distro new file mode 100644 index 00000000..ac502373 --- /dev/null +++ b/.github/scripts/etc-files-list-update/current_distro @@ -0,0 +1 @@ +kinetic diff --git a/.github/scripts/etc-files-list-update/deb-url-history/deb.urls.initial.gz b/.github/scripts/etc-files-list-update/deb-url-history/deb.urls.initial.gz new file mode 100644 index 00000000..6d6aad89 Binary files /dev/null and b/.github/scripts/etc-files-list-update/deb-url-history/deb.urls.initial.gz differ diff --git a/.github/scripts/etc-files-list-update/update.sh b/.github/scripts/etc-files-list-update/update.sh new file mode 100755 index 00000000..3e16a64d --- /dev/null +++ b/.github/scripts/etc-files-list-update/update.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +export listpath="../../../Fuzzing/LFI/LFI-coyote0x90-linux-etc-files.txt" +[ -f all_files.gz ] && rm all_files.gz + +# every year, start a new gzip list so there is not as much bloat updating blobs in git +year=$(date +%Y) + +echo "finding URLs" 1>&2 + +# get new URLs +util/find-new-urls.awk > url_batch + +# exit if there's no new URLs to scan +if [[ $(wc -l url_batch | awk '{print $1}') == 0 ]] +then + echo "no new URLs" 1>&2 + exit 0 +fi + +# scan them +for u in $(cat url_batch) +do + echo "scanning $u" 1>&2 + util/scan-package.sh "$u" | gzip >> all_files.gz +done + +echo "searching for etc files" 1>&2 + +# get all files matching /etc/ +# ignore repeat files already in the list +zcat all_files.gz | awk ' +BEGIN { + lp = ENVIRON["listpath"] + while (getline < lp) { + seen[$0] = 1 + } +} +/^\/etc\// && !seen[$0] { print } +' > updated_etc_files + +echo "updating list" 1>&2 + +# concatenate the existing list and the output +cat "$listpath" updated_etc_files > updated_file + +# update the list +mv updated_file "$listpath" + +# save progress +cat url_batch | gzip >> "deb-url-history/$year.gz" + +# cleanup +rm url_batch +rm updated_etc_files +rm all_files.gz diff --git a/.github/scripts/etc-files-list-update/util/find-new-urls.awk b/.github/scripts/etc-files-list-update/util/find-new-urls.awk new file mode 100755 index 00000000..b18c9473 --- /dev/null +++ b/.github/scripts/etc-files-list-update/util/find-new-urls.awk @@ -0,0 +1,14 @@ +#!/usr/bin/awk -f +BEGIN { + # load all the URLs we scanned already + command = "util/print-urls.sh" + while (command | getline) { + urls[$0] = 1 # add to set + } + close(command) + # get package URLs that do not appear in the list + command = "util/get-package-urls.sh" + while (command | getline) { + if (!($0 in urls)) print + } +} diff --git a/.github/scripts/etc-files-list-update/util/get-package-urls.sh b/.github/scripts/etc-files-list-update/util/get-package-urls.sh new file mode 100755 index 00000000..721c309f --- /dev/null +++ b/.github/scripts/etc-files-list-update/util/get-package-urls.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# get new package URLs +# load the list of amd64 packages from ubuntu +export dist="$(cat current_distro)" +export repo="http://archive.ubuntu.com/ubuntu" + +# print URLs +curl $repo/dists/$dist/main/binary-amd64/Packages.gz | \ + gzip -d | awk '/^Filename: / { print ENVIRON["repo"] "/" $2 }' diff --git a/.github/scripts/etc-files-list-update/util/print-urls.sh b/.github/scripts/etc-files-list-update/util/print-urls.sh new file mode 100755 index 00000000..65b4b485 --- /dev/null +++ b/.github/scripts/etc-files-list-update/util/print-urls.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# print every url in every file in deb-url-history directory +for f in $(ls deb-url-history/) +do + zcat "deb-url-history/$f" +done diff --git a/.github/scripts/etc-files-list-update/util/scan-package.sh b/.github/scripts/etc-files-list-update/util/scan-package.sh new file mode 100755 index 00000000..d4db3f44 --- /dev/null +++ b/.github/scripts/etc-files-list-update/util/scan-package.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +export url=$1 + +tf=$(mktemp -d) +wd=$(pwd) +cd $tf +wget "$url" -O output 2>/dev/null >/dev/null +ar -x output # extracts data.tar.xz control.tar.xz + +# extract tar +if [ -f control.tar.xz ]; then + xz -d control.tar.xz 2>/dev/null +elif [ -f control.tar.zst ]; then # need to install zstd + zstd -d control.tar.zst 2>/dev/null +elif [ -f control.tar.gz ]; then + tar -xzvf control.tar.gz 2>/dev/null >/dev/null +else + (echo "$url unknown deb compression format" && ls) >> problems + exit 0 +fi + +# extract control +tar -xvf control.tar 2>/dev/null >/dev/null + +# replace 2 spaces after md5sum with tab +sed 's/^\([0-9a-zA-Z]*\) /\1\t/' md5sums > inputdata + +# print filenames +awk ' +BEGIN { + FS="\t" +} +{ + gsub(/^\.\//,"",$2) + print "/" $2 +} +' inputdata + +# cleanup +cd "$wd" +rm -rf $tf