diff --git a/.bin/wordlist-updaters/sources.json b/.bin/wordlist-updaters/sources.json index ea0f45f7..1d68cf3b 100644 --- a/.bin/wordlist-updaters/sources.json +++ b/.bin/wordlist-updaters/sources.json @@ -5,6 +5,14 @@ "source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list", "output": "Passwords/scraped-JWT-secrets.txt", "post_run_script": "", - "frequency": "3h" + "frequency": "6h" + }, + { + "name": "Trickest wordlist update", + "type": "git_dir", + "source": "https://github.com/trickest/wordlists.git", + "output": ".working_space", + "post_run_script": ".bin/trickest-patcher.py", + "update_time": "1030" } ] \ No newline at end of file diff --git a/.bin/wordlist-updaters/status.json b/.bin/wordlist-updaters/status.json index 673295cc..3bf20208 100644 --- a/.bin/wordlist-updaters/status.json +++ b/.bin/wordlist-updaters/status.json @@ -1,6 +1,8 @@ { "Jwt secrets update": { - "last_update" : 0 + "last_update": 1712309841 + }, + "Trickest wordlist update": { + "last_update": 1712310048 } -} - \ No newline at end of file +} \ No newline at end of file diff --git a/.bin/wordlist-updaters/updater.py b/.bin/wordlist-updaters/updater.py index a3650e01..1f1e8c8c 100755 --- a/.bin/wordlist-updaters/updater.py +++ b/.bin/wordlist-updaters/updater.py @@ -1,18 +1,21 @@ #!/usr/bin/env python3 -import json import os -from datetime import datetime, timedelta import re +import json import requests +import subprocess +from datetime import datetime, timedelta # TODO Summary file +# TODO Advanced crontab syntax BASE_PATH = ".bin/wordlist-updaters" SOURCE_PATH = os.path.join(BASE_PATH, "sources.json") STATUS_PATH = os.path.join(BASE_PATH, "status.json") FREQUENCY_REGEX = r"^(?:([0-9]+)d|())(?:([0-9]+)h|())(?!.*?d)$" VALID_TYPES = ["file", "git_dir"] +TIME_NOW = datetime.now() def request_wrapper(url): @@ -44,26 +47,18 @@ to_check = [] for source in SOURCES: task_name = source["name"] + source_keys = source.keys() if not task_name in STATUS.keys(): print(f"[+] Queuing task {task_name} as task was never checked before") to_check.append(source) continue - if not "last_update" in STATUS[task_name].keys() or not isinstance(STATUS[task_name]["last_update"], int): - print(f"[!] Queuing task {task_name} as last_update field is missing/invalid") - to_check.append(source) - continue - - if not "frequency" in source.keys() or not isinstance(source["frequency"], str): - print(f"[!] Skipping task {task_name} as frequency field is missing/invalid") - continue - - if not "output" in source.keys() or not isinstance(source["output"], str): + if not "output" in source_keys or not isinstance(source["output"], str): print(f"[!] Skipping task {task_name} as output field is missing/invalid") continue - if not "type" in source.keys() or not isinstance(source["type"], str): + if not "type" in source_keys or not isinstance(source["type"], str): print(f"[!] Skipping task {task_name} as type field is missing/invalid") continue @@ -75,32 +70,74 @@ for source in SOURCES: print(f"[!] Skipping task {task_name} as output path is not relative.") continue - regex_match = re.search(FREQUENCY_REGEX, source["frequency"]) - - if not regex_match: - print(f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours") + if source["type"].startswith("git_") and not source["source"].endswith(".git"): + print(f"[!] Skipping task {task_name} as a git task was defined with a non git url.") continue - days, _, hours, _ = regex_match.groups() + if not "last_update" in STATUS[task_name].keys() or not isinstance(STATUS[task_name]["last_update"], int): + print(f"[!] Queuing task {task_name} as last_update field is missing/invalid") + to_check.append(source) + continue - days = bool(days) | 0 - hours = bool(hours) | 0 + if not ("frequency" in source_keys) ^ ("update_time" in source_keys): + print(f"[!] Skipping task {task_name} as only frequency or update_time can be specified") + continue - next_update_time = datetime.fromtimestamp(STATUS[task_name]["last_update"]) + timedelta(days=days, hours=hours) - time_now = datetime.now() - time_from_update = time_now - next_update_time + if "frequency" in source_keys and isinstance(source["frequency"], str): + regex_match = re.search(FREQUENCY_REGEX, source["frequency"]) - if time_now < next_update_time: - if time_from_update.seconds <= 300: - print(f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_from_update.seconds} seconds to update)") + if not regex_match: + print(f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours") + continue + + days, _, hours, _ = regex_match.groups() + + days = bool(days) | 0 + hours = bool(hours) | 0 + + next_update_time = datetime.fromtimestamp(STATUS[task_name]["last_update"]) + timedelta(days=days, hours=hours) + time_from_update = TIME_NOW - next_update_time + time_to_update = next_update_time - TIME_NOW + + if TIME_NOW < next_update_time: + if time_to_update.seconds <= 300: + print(f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_to_update.seconds} seconds to update)") + to_check.append(source) + continue + + print(f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_to_update.seconds} seconds to update)") + continue + + print(f"[+] Queuing task {task_name} as it is {time_to_update.seconds} seconds after scheduled update time.") + to_check.append(source) + + elif "update_time" in source_keys and isinstance(source["update_time"], str): + update_time = source["update_time"] + + if len(update_time) != 4 and update_time.isnumeric(): + print(f"[!] Skipping task {task_name} as it is in a incorrect format") + continue + + hours = int(update_time[:2]) + minutes = int(update_time[2:]) + + if not hours in range(1, 25): + print(f"[!] Skipping task {task_name} as hours is not in range 1-24.") + continue + + if not minutes in range(1, 61): + print(f"[!] Skipping task {task_name} as minutes is not in range 1-60.") + continue + + scheduled_update_time = TIME_NOW.replace(hour=hours, minute=minutes) + if TIME_NOW <= scheduled_update_time and TIME_NOW + timedelta(hours=1) >= scheduled_update_time: + print(f"[+] Queuing task {task_name} as update time is within the next hour") to_check.append(source) continue - print(f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_from_update.seconds} seconds to update)") + else: + print(f"[!] Skipping task {task_name} as update_time field is invalid") continue - - print(f"[+] Queuing task {task_name} as it is {time_from_update.seconds} seconds after scheduled update time.") - to_check.append(source) if len(to_check) == 0: print(f"[!] No task were queued. Exiting.") @@ -110,10 +147,32 @@ print(f"[+] Queued a total of {len(to_check)} tasks to run.") for task in to_check: print(f"[+] Starting task {task['name']}") + + if not task["name"] in STATUS.keys(): + STATUS[task["name"]] = {} - if task["type"] == "file": + task_type = task["type"] + + if task_type == "file": content = request_wrapper(task["source"]) open(task["output"], "w").write(content) - print(f"Saved file to output location") + print(f"[+] Saved file to output location") + + STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp()) + + elif task_type == "git_dir": + if not os.path.exists(task['output']): + print(f"[+] Making directory {task['output']}") + os.makedirs(task["output"]) + + subprocess.run(["git", "clone", "-q", "--depth=1", task["source"]], cwd=task["output"]) + STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp()) + + if task["post_run_script"]: + print("[+] Running post run script") + subprocess.run(task["post_run_script"]) + print("[+] Finished running post run script") + print(f"[+] Finished task {task['name']}") - \ No newline at end of file + +json.dump(STATUS, open(STATUS_PATH, "w"), indent=4) \ No newline at end of file