mirror of
https://github.com/danielmiessler/SecLists
synced 2026-01-02 22:26:52 +01:00
Create file-exts-downloader.py
This commit is contained in:
parent
cb188b97f8
commit
2a85579583
1 changed files with 119 additions and 0 deletions
119
.bin/file-exts-downloader.py
Normal file
119
.bin/file-exts-downloader.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
# Open a pull req if any sources are missing/need to be added
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import time
|
||||
import re
|
||||
|
||||
MICROSOFT_URL="https://support.microsoft.com/en-us/windows/common-file-name-extensions-in-windows-da4a4430-8e76-89c5-59f7-1cdbbc75cb01"
|
||||
WIKI_ROOT="https://en.wikipedia.org"
|
||||
WIKI_URL="https://en.wikipedia.org/wiki/List_of_filename_extensions"
|
||||
GFG_URL="https://www.geeksforgeeks.org/list-of-file-formats/"
|
||||
|
||||
wiki_urls=[]
|
||||
exts=[]
|
||||
tables=[]
|
||||
|
||||
res=requests.get(WIKI_URL).text
|
||||
wiki_soup=BeautifulSoup(res,"html.parser")
|
||||
|
||||
for i in wiki_soup.findAll('div',{"class":"mw-content-ltr mw-parser-output"})[0].findAll("ul")[2].findAll("li"):
|
||||
wiki_urls.append(WIKI_ROOT+i.a.get('href'))
|
||||
|
||||
for i in wiki_urls:
|
||||
res=requests.get(i).text
|
||||
wiki_soup=BeautifulSoup(res,"html.parser")
|
||||
|
||||
tables+=wiki_soup.findAll("table")
|
||||
time.sleep(0.5)
|
||||
|
||||
for i in tables:
|
||||
|
||||
if "wikitable" not in i["class"]:
|
||||
continue
|
||||
|
||||
|
||||
for ext in i.tbody.findAll("tr"):
|
||||
ext=ext.findAll('td')
|
||||
|
||||
if ext==[]:
|
||||
continue
|
||||
|
||||
ext=re.sub(r"\[.*?\]","",ext[0].text).strip()
|
||||
|
||||
if "," in ext:
|
||||
for j in ext.split(","):
|
||||
|
||||
if "." in j:
|
||||
continue
|
||||
|
||||
exts.append(j.strip())
|
||||
continue
|
||||
|
||||
if "." in ext:
|
||||
continue
|
||||
|
||||
exts.append(ext)
|
||||
|
||||
res=requests.get(MICROSOFT_URL).text
|
||||
microsoft_soup=BeautifulSoup(res,"html.parser")
|
||||
|
||||
microsoft_exts=microsoft_soup.findAll("tbody")[1].findAll('p')[::2]
|
||||
|
||||
for i in microsoft_exts:
|
||||
|
||||
i=i.text
|
||||
|
||||
if "," in i:
|
||||
i=i.split(",")
|
||||
for j in i:
|
||||
exts.append(j.strip().upper())
|
||||
continue
|
||||
|
||||
exts.append(i.upper())
|
||||
|
||||
res=requests.get(GFG_URL).text
|
||||
gfg_soup=BeautifulSoup(res,"html.parser")
|
||||
|
||||
gfg_exts=gfg_soup.findAll("tbody")
|
||||
|
||||
for i in gfg_exts:
|
||||
i=i.findAll('th')
|
||||
for ext in i:
|
||||
ext=ext.text.strip()
|
||||
if ext.startswith('.'):
|
||||
exts.append(ext[1:].upper())
|
||||
else:
|
||||
exts.append(ext.upper())
|
||||
|
||||
exts=list(dict.fromkeys(exts))
|
||||
|
||||
open("file-ext.txt","w").write("\n".join(exts))
|
||||
|
||||
mutated_exts=[]
|
||||
|
||||
for i in exts:
|
||||
mutated_exts.append(i)
|
||||
mutated_exts.append(i.lower())
|
||||
|
||||
mutated_exts=list(dict.fromkeys(mutated_exts))
|
||||
|
||||
open("file-ext-all-cases.txt","w").write("\n".join(mutated_exts))
|
||||
|
||||
mutated_exts=[]
|
||||
|
||||
for i in exts:
|
||||
mutated_exts.append(i.lower())
|
||||
|
||||
mutated_exts=list(dict.fromkeys(mutated_exts))
|
||||
open("file-ext-lower-cases.txt","w").write("\n".join(mutated_exts))
|
||||
|
||||
mutated_exts=[]
|
||||
|
||||
for i in exts:
|
||||
mutated_exts.append(i.upper())
|
||||
|
||||
mutated_exts=list(dict.fromkeys(mutated_exts))
|
||||
open("file-ext-upper-cases.txt","w").write("\n".join(mutated_exts))
|
||||
Loading…
Reference in a new issue