mirror of
https://github.com/swisskyrepo/PayloadsAllTheThings
synced 2025-12-06 08:54:40 +01:00
Add JSON format for SQL Injection payloads
- Created Python script to convert all SQL Injection payloads to JSON format - Generated sqli_payloads.json with 1449 unique SQL injection payloads - Each payload includes metadata (type, technique, dbms, severity, source) - Supports multiple database types: MySQL, MSSQL, PostgreSQL, Oracle, SQLite, DB2, Cassandra, BigQuery - Categorized by injection techniques: union-based, time-based, boolean-based, error-based, etc.
This commit is contained in:
parent
78882b4a5d
commit
933651f6cd
2 changed files with 16273 additions and 0 deletions
332
SQL Injection/convert_to_json.py
Normal file
332
SQL Injection/convert_to_json.py
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to convert SQL Injection payloads from markdown files to JSON format
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
def is_valid_sqli_payload(line: str) -> bool:
|
||||
"""Check if a line is a valid SQL injection payload"""
|
||||
# Skip empty lines, comments, and documentation
|
||||
if not line or line.startswith('//') or line.startswith('#') or line.startswith('/*'):
|
||||
return False
|
||||
|
||||
# Skip lines that are clearly documentation
|
||||
if line.startswith('-') or line.startswith('*') or line.startswith('['):
|
||||
return False
|
||||
|
||||
# Skip lines that contain common documentation words
|
||||
doc_words = ['you can', 'this is', 'example:', 'note:', 'payload:', 'source:',
|
||||
'allows you', 'can be used', 'requirements:', 'description:',
|
||||
'using this', 'this will', 'for example', 'such as']
|
||||
if any(word in line.lower() for word in doc_words):
|
||||
return False
|
||||
|
||||
# Skip lines that are just sentences/descriptions
|
||||
common_words = ['the', 'is', 'are', 'was', 'were', 'been', 'being', 'have', 'has', 'had',
|
||||
'do', 'does', 'did', 'will', 'would', 'should', 'can', 'could', 'may',
|
||||
'might', 'must', 'shall', 'a', 'an', 'and', 'or', 'but', 'if', 'because',
|
||||
'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against',
|
||||
'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below']
|
||||
|
||||
word_count = sum(1 for word in common_words if ' ' + word + ' ' in ' ' + line.lower() + ' ')
|
||||
if word_count >= 3: # If it contains 3 or more common words, likely a sentence
|
||||
return False
|
||||
|
||||
# Skip http/https links that are not part of payloads
|
||||
if line.startswith('http') and 'select' not in line.lower() and 'union' not in line.lower():
|
||||
return False
|
||||
|
||||
# Skip table headers and separators
|
||||
if line.startswith('|') or line.count('|') >= 3:
|
||||
return False
|
||||
|
||||
# Must contain at least one SQL injection indicator
|
||||
sqli_indicators = [
|
||||
'select', 'union', 'insert', 'update', 'delete', 'drop', 'create',
|
||||
'alter', 'exec', 'execute', 'declare', 'cast', 'convert', 'concat',
|
||||
'substring', 'sleep', 'benchmark', 'waitfor', 'delay', 'pg_sleep',
|
||||
'dbms_', 'utl_', 'xp_', 'sp_', 'sys.', 'information_schema',
|
||||
'having', 'group by', 'order by', 'limit', 'offset', 'where',
|
||||
'@@', 'schema', 'database', 'table', 'column', 'char(', 'chr(',
|
||||
'ascii', 'hex', 'unhex', 'load_file', 'into outfile', 'dumpfile',
|
||||
"' or ", '" or ', '-- ', '#', '/*', '*/', 'null', 'version()'
|
||||
]
|
||||
|
||||
has_indicator = any(indicator in line.lower() for indicator in sqli_indicators)
|
||||
|
||||
# SQL payloads often contain SQL syntax characters
|
||||
has_sql_chars = any(char in line for char in ["'", '"', '--', '/*', ';', '='])
|
||||
|
||||
# Additional checks for common SQL patterns
|
||||
sql_patterns = [
|
||||
r"'\s+or\s+", r'"\s+or\s+', r'1\s*=\s*1', r'1\s*=\s*2',
|
||||
r'union\s+select', r'union\s+all', r'and\s+1\s*=',
|
||||
r'admin\'', r'\'--', r'\'#', r'\) or ', r'\) and '
|
||||
]
|
||||
|
||||
has_sql_pattern = any(re.search(pattern, line, re.IGNORECASE) for pattern in sql_patterns)
|
||||
|
||||
return has_indicator or has_sql_chars or has_sql_pattern
|
||||
|
||||
|
||||
def extract_payloads_from_code_block(code_block: str, section_name: str, db_type: str = "generic") -> List[Dict[str, Any]]:
|
||||
"""Extract individual SQL injection payloads from a code block"""
|
||||
payloads = []
|
||||
lines = code_block.strip().split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Validate if this is a real SQL injection payload
|
||||
if not is_valid_sqli_payload(line):
|
||||
continue
|
||||
|
||||
# Determine type and context based on the payload
|
||||
payload_type = determine_type(line, section_name)
|
||||
technique = determine_technique(line)
|
||||
severity = determine_severity(line)
|
||||
|
||||
payload_obj = {
|
||||
"payload": line,
|
||||
"category": "sqli",
|
||||
"metadata": {
|
||||
"type": payload_type,
|
||||
"technique": technique,
|
||||
"dbms": db_type,
|
||||
"severity": severity,
|
||||
"source": section_name
|
||||
}
|
||||
}
|
||||
payloads.append(payload_obj)
|
||||
|
||||
return payloads
|
||||
|
||||
|
||||
def determine_type(payload: str, section: str) -> str:
|
||||
"""Determine the type of SQL injection payload"""
|
||||
payload_lower = payload.lower()
|
||||
section_lower = section.lower()
|
||||
|
||||
if 'auth' in section_lower or 'bypass' in section_lower:
|
||||
return 'authentication_bypass'
|
||||
elif 'union' in payload_lower:
|
||||
return 'union_based'
|
||||
elif 'error' in section_lower:
|
||||
return 'error_based'
|
||||
elif 'blind' in section_lower or 'time' in section_lower:
|
||||
return 'blind'
|
||||
elif 'sleep' in payload_lower or 'benchmark' in payload_lower or 'waitfor' in payload_lower or 'pg_sleep' in payload_lower:
|
||||
return 'time_based'
|
||||
elif 'boolean' in section_lower:
|
||||
return 'boolean_based'
|
||||
elif 'stacked' in section_lower:
|
||||
return 'stacked_queries'
|
||||
elif 'polyglot' in section_lower:
|
||||
return 'polyglot'
|
||||
elif 'out of band' in section_lower or 'oast' in section_lower:
|
||||
return 'out_of_band'
|
||||
else:
|
||||
return 'generic'
|
||||
|
||||
|
||||
def determine_technique(payload: str) -> str:
|
||||
"""Determine the SQL injection technique"""
|
||||
payload_lower = payload.lower()
|
||||
|
||||
if 'union' in payload_lower and 'select' in payload_lower:
|
||||
return 'union_select'
|
||||
elif any(keyword in payload_lower for keyword in ['sleep', 'benchmark', 'waitfor', 'pg_sleep', 'dbms_lock']):
|
||||
return 'time_delay'
|
||||
elif "' or " in payload_lower or '" or ' in payload_lower or '1=1' in payload_lower:
|
||||
return 'boolean_logic'
|
||||
elif 'extractvalue' in payload_lower or 'updatexml' in payload_lower or 'xmltype' in payload_lower:
|
||||
return 'xml_error'
|
||||
elif 'load_file' in payload_lower or 'into outfile' in payload_lower or 'into dumpfile' in payload_lower:
|
||||
return 'file_operation'
|
||||
elif 'exec' in payload_lower or 'execute' in payload_lower or 'xp_cmdshell' in payload_lower:
|
||||
return 'command_execution'
|
||||
elif 'cast' in payload_lower or 'convert' in payload_lower:
|
||||
return 'type_conversion'
|
||||
else:
|
||||
return 'basic'
|
||||
|
||||
|
||||
def determine_severity(payload: str) -> str:
|
||||
"""Determine the severity of the SQL injection payload"""
|
||||
payload_lower = payload.lower()
|
||||
|
||||
# Critical if it involves command execution or file operations
|
||||
if any(keyword in payload_lower for keyword in ['xp_cmdshell', 'exec', 'execute', 'load_file', 'into outfile', 'into dumpfile']):
|
||||
return 'critical'
|
||||
# High for data extraction
|
||||
elif any(keyword in payload_lower for keyword in ['union select', 'information_schema', 'sys.', 'database()', 'version()']):
|
||||
return 'high'
|
||||
# Medium for authentication bypass
|
||||
elif "' or " in payload_lower or '" or ' in payload_lower or '1=1' in payload_lower:
|
||||
return 'high'
|
||||
# Medium for blind injection
|
||||
elif any(keyword in payload_lower for keyword in ['sleep', 'benchmark', 'waitfor', 'pg_sleep']):
|
||||
return 'medium'
|
||||
else:
|
||||
return 'medium'
|
||||
|
||||
|
||||
def parse_markdown_file(file_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Parse a markdown file and extract all SQL injection payloads"""
|
||||
all_payloads = []
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Determine database type from filename
|
||||
db_type = "generic"
|
||||
if 'mysql' in file_path.name.lower():
|
||||
db_type = "mysql"
|
||||
elif 'mssql' in file_path.name.lower():
|
||||
db_type = "mssql"
|
||||
elif 'postgresql' in file_path.name.lower():
|
||||
db_type = "postgresql"
|
||||
elif 'oracle' in file_path.name.lower():
|
||||
db_type = "oracle"
|
||||
elif 'sqlite' in file_path.name.lower():
|
||||
db_type = "sqlite"
|
||||
elif 'db2' in file_path.name.lower():
|
||||
db_type = "db2"
|
||||
elif 'cassandra' in file_path.name.lower():
|
||||
db_type = "cassandra"
|
||||
elif 'bigquery' in file_path.name.lower():
|
||||
db_type = "bigquery"
|
||||
|
||||
# Extract code blocks
|
||||
code_block_pattern = r'```(?:sql|bash|ps1|sh)?\n(.*?)```'
|
||||
matches = re.findall(code_block_pattern, content, re.DOTALL)
|
||||
|
||||
# Extract section headers for context
|
||||
current_section = file_path.stem
|
||||
|
||||
# Find section headers
|
||||
section_pattern = r'^#+\s+(.+)$'
|
||||
sections = re.findall(section_pattern, content, re.MULTILINE)
|
||||
|
||||
# Process code blocks
|
||||
for i, code_block in enumerate(matches):
|
||||
# Try to find the section this code block belongs to
|
||||
section_name = current_section
|
||||
if i < len(sections):
|
||||
section_name = sections[i] if i < len(sections) else current_section
|
||||
|
||||
payloads = extract_payloads_from_code_block(code_block, section_name, db_type)
|
||||
all_payloads.extend(payloads)
|
||||
|
||||
return all_payloads
|
||||
|
||||
|
||||
def parse_text_file(file_path: Path) -> List[Dict[str, Any]]:
|
||||
"""Parse a text file containing raw SQL injection payloads"""
|
||||
all_payloads = []
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Determine database type from filename
|
||||
db_type = "generic"
|
||||
filename_lower = file_path.name.lower()
|
||||
if 'mysql' in filename_lower:
|
||||
db_type = "mysql"
|
||||
elif 'mssql' in filename_lower:
|
||||
db_type = "mssql"
|
||||
elif 'postgres' in filename_lower:
|
||||
db_type = "postgresql"
|
||||
elif 'oracle' in filename_lower:
|
||||
db_type = "oracle"
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Validate if this is a real SQL injection payload
|
||||
if not is_valid_sqli_payload(line):
|
||||
continue
|
||||
|
||||
payload_type = determine_type(line, file_path.stem)
|
||||
technique = determine_technique(line)
|
||||
severity = determine_severity(line)
|
||||
|
||||
payload_obj = {
|
||||
"payload": line,
|
||||
"category": "sqli",
|
||||
"metadata": {
|
||||
"type": payload_type,
|
||||
"technique": technique,
|
||||
"dbms": db_type,
|
||||
"severity": severity,
|
||||
"source": file_path.stem
|
||||
}
|
||||
}
|
||||
all_payloads.append(payload_obj)
|
||||
|
||||
return all_payloads
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to convert all SQL injection payloads to JSON"""
|
||||
base_dir = Path(__file__).parent
|
||||
output_file = base_dir / 'sqli_payloads.json'
|
||||
|
||||
all_payloads = []
|
||||
|
||||
# Process markdown files
|
||||
md_files = [
|
||||
'README.md',
|
||||
'MySQL Injection.md',
|
||||
'MSSQL Injection.md',
|
||||
'PostgreSQL Injection.md',
|
||||
'OracleSQL Injection.md',
|
||||
'SQLite Injection.md',
|
||||
'DB2 Injection.md',
|
||||
'Cassandra Injection.md',
|
||||
'BigQuery Injection.md'
|
||||
]
|
||||
|
||||
for md_file in md_files:
|
||||
file_path = base_dir / md_file
|
||||
if file_path.exists():
|
||||
print(f"Processing {md_file}...")
|
||||
payloads = parse_markdown_file(file_path)
|
||||
all_payloads.extend(payloads)
|
||||
print(f" Found {len(payloads)} payloads")
|
||||
|
||||
# Process Intruder folder
|
||||
intruder_dir = base_dir / 'Intruder'
|
||||
if intruder_dir.exists():
|
||||
for txt_file in intruder_dir.iterdir():
|
||||
if txt_file.is_file():
|
||||
print(f"Processing {txt_file.name}...")
|
||||
payloads = parse_text_file(txt_file)
|
||||
all_payloads.extend(payloads)
|
||||
print(f" Found {len(payloads)} payloads")
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_payloads = []
|
||||
for payload in all_payloads:
|
||||
payload_str = payload['payload']
|
||||
if payload_str not in seen:
|
||||
seen.add(payload_str)
|
||||
unique_payloads.append(payload)
|
||||
|
||||
# Write to JSON file
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(unique_payloads, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\nTotal payloads: {len(all_payloads)}")
|
||||
print(f"Unique payloads: {len(unique_payloads)}")
|
||||
print(f"Output saved to: {output_file}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
15941
SQL Injection/sqli_payloads.json
Normal file
15941
SQL Injection/sqli_payloads.json
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue