include scripts to update etc list

This commit is contained in:
Nicholas Long 2022-07-06 04:12:15 +00:00
parent 3c181f15fe
commit 3363526e3d
8 changed files with 144 additions and 0 deletions

View file

@ -0,0 +1,14 @@
# etc file list updater
## overview
The purpose of this set of scripts is to scan deb packages from the ubuntu apt repository.
## running
The script must be run from its working directory.
```bash
cd .github/scripts/etc-files-list-update && ./update.sh
```
## details
URLs for deb files that have already been scanned are stored in gzip format in the `deb-url-history/` directory.
The current ubuntu distro for which packages are retrieved is stored in the file `current_distro`. This should be changed every few years.

View file

@ -0,0 +1 @@
kinetic

View file

@ -0,0 +1,56 @@
#!/bin/bash
export listpath="../../../Fuzzing/LFI/LFI-coyote0x90-linux-etc-files.txt"
[ -f all_files.gz ] && rm all_files.gz
# every year, start a new gzip list so there is not as much bloat updating blobs in git
year=$(date +%Y)
echo "finding URLs" 1>&2
# get new URLs
util/find-new-urls.awk > url_batch
# exit if there's no new URLs to scan
if [[ $(wc -l url_batch | awk '{print $1}') == 0 ]]
then
echo "no new URLs" 1>&2
exit 0
fi
# scan them
for u in $(cat url_batch)
do
echo "scanning $u" 1>&2
util/scan-package.sh "$u" | gzip >> all_files.gz
done
echo "searching for etc files" 1>&2
# get all files matching /etc/
# ignore repeat files already in the list
zcat all_files.gz | awk '
BEGIN {
lp = ENVIRON["listpath"]
while (getline < lp) {
seen[$0] = 1
}
}
/^\/etc\// && !seen[$0] { print }
' > updated_etc_files
echo "updating list" 1>&2
# concatenate the existing list and the output
cat "$listpath" updated_etc_files > updated_file
# update the list
mv updated_file "$listpath"
# save progress
cat url_batch | gzip >> "deb-url-history/$year.gz"
# cleanup
rm url_batch
rm updated_etc_files
rm all_files.gz

View file

@ -0,0 +1,14 @@
#!/usr/bin/awk -f
BEGIN {
# load all the URLs we scanned already
command = "util/print-urls.sh"
while (command | getline) {
urls[$0] = 1 # add to set
}
close(command)
# get package URLs that do not appear in the list
command = "util/get-package-urls.sh"
while (command | getline) {
if (!($0 in urls)) print
}
}

View file

@ -0,0 +1,10 @@
#!/bin/bash
# get new package URLs
# load the list of amd64 packages from ubuntu
export dist="$(cat current_distro)"
export repo="http://archive.ubuntu.com/ubuntu"
# print URLs
curl $repo/dists/$dist/main/binary-amd64/Packages.gz | \
gzip -d | awk '/^Filename: / { print ENVIRON["repo"] "/" $2 }'

View file

@ -0,0 +1,7 @@
#!/bin/bash
# print every url in every file in deb-url-history directory
for f in $(ls deb-url-history/)
do
zcat "deb-url-history/$f"
done

View file

@ -0,0 +1,42 @@
#!/bin/bash
export url=$1
tf=$(mktemp -d)
wd=$(pwd)
cd $tf
wget "$url" -O output 2>/dev/null >/dev/null
ar -x output # extracts data.tar.xz control.tar.xz
# extract tar
if [ -f control.tar.xz ]; then
xz -d control.tar.xz 2>/dev/null
elif [ -f control.tar.zst ]; then # need to install zstd
zstd -d control.tar.zst 2>/dev/null
elif [ -f control.tar.gz ]; then
tar -xzvf control.tar.gz 2>/dev/null >/dev/null
else
(echo "$url unknown deb compression format" && ls) >> problems
exit 0
fi
# extract control
tar -xvf control.tar 2>/dev/null >/dev/null
# replace 2 spaces after md5sum with tab
sed 's/^\([0-9a-zA-Z]*\) /\1\t/' md5sums > inputdata
# print filenames
awk '
BEGIN {
FS="\t"
}
{
gsub(/^\.\//,"",$2)
print "/" $2
}
' inputdata
# cleanup
cd "$wd"
rm -rf $tf