Merge 933651f6cd into ca50df2336

2025-12-06 17:02:53 +01:00 · 2025-11-16 16:38:32 +00:00 · 2025-11-16 16:38:32 +00:00 · ebaa94e879
commit ebaa94e879
parent ca50df2336 933651f6cd
4 changed files with 35203 additions and 0 deletions
--- a/Injection/convert_to_json.py
+++ b/Injection/convert_to_json.py
@ -0,0 +1,332 @@
 #!/usr/bin/env python3
 """
 Script to convert SQL Injection payloads from markdown files to JSON format
 """
 import json
 import re
 from pathlib import Path
 from typing import List, Dict, Any
 def is_valid_sqli_payload(line: str) -> bool:
    """Check if a line is a valid SQL injection payload"""
    # Skip empty lines, comments, and documentation
    if not line or line.startswith('//') or line.startswith('#') or line.startswith('/*'):
        return False
    # Skip lines that are clearly documentation
    if line.startswith('-') or line.startswith('*') or line.startswith('['):
        return False
    # Skip lines that contain common documentation words
    doc_words = ['you can', 'this is', 'example:', 'note:', 'payload:', 'source:',
                 'allows you', 'can be used', 'requirements:', 'description:',
                 'using this', 'this will', 'for example', 'such as']
    if any(word in line.lower() for word in doc_words):
        return False
    # Skip lines that are just sentences/descriptions
    common_words = ['the', 'is', 'are', 'was', 'were', 'been', 'being', 'have', 'has', 'had',
                    'do', 'does', 'did', 'will', 'would', 'should', 'can', 'could', 'may',
                    'might', 'must', 'shall', 'a', 'an', 'and', 'or', 'but', 'if', 'because',
                    'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against',
                    'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below']
    word_count = sum(1 for word in common_words if ' ' + word + ' ' in ' ' + line.lower() + ' ')
    if word_count >= 3:  # If it contains 3 or more common words, likely a sentence
        return False
    # Skip http/https links that are not part of payloads
    if line.startswith('http') and 'select' not in line.lower() and 'union' not in line.lower():
        return False
    # Skip table headers and separators
    if line.startswith('|') or line.count('|') >= 3:
        return False
    # Must contain at least one SQL injection indicator
    sqli_indicators = [
        'select', 'union', 'insert', 'update', 'delete', 'drop', 'create',
        'alter', 'exec', 'execute', 'declare', 'cast', 'convert', 'concat',
        'substring', 'sleep', 'benchmark', 'waitfor', 'delay', 'pg_sleep',
        'dbms_', 'utl_', 'xp_', 'sp_', 'sys.', 'information_schema',
        'having', 'group by', 'order by', 'limit', 'offset', 'where',
        '@@', 'schema', 'database', 'table', 'column', 'char(', 'chr(',
        'ascii', 'hex', 'unhex', 'load_file', 'into outfile', 'dumpfile',
        "' or ", '" or ', '-- ', '#', '/*', '*/', 'null', 'version()'
    ]
    has_indicator = any(indicator in line.lower() for indicator in sqli_indicators)
    # SQL payloads often contain SQL syntax characters
    has_sql_chars = any(char in line for char in ["'", '"', '--', '/*', ';', '='])
    # Additional checks for common SQL patterns
    sql_patterns = [
        r"'\s+or\s+", r'"\s+or\s+', r'1\s*=\s*1', r'1\s*=\s*2',
        r'union\s+select', r'union\s+all', r'and\s+1\s*=',
        r'admin\'', r'\'--', r'\'#', r'\) or ', r'\) and '
    ]
    has_sql_pattern = any(re.search(pattern, line, re.IGNORECASE) for pattern in sql_patterns)
    return has_indicator or has_sql_chars or has_sql_pattern
 def extract_payloads_from_code_block(code_block: str, section_name: str, db_type: str = "generic") -> List[Dict[str, Any]]:
    """Extract individual SQL injection payloads from a code block"""
    payloads = []
    lines = code_block.strip().split('\n')
    for line in lines:
        line = line.strip()
        # Validate if this is a real SQL injection payload
        if not is_valid_sqli_payload(line):
            continue
        # Determine type and context based on the payload
        payload_type = determine_type(line, section_name)
        technique = determine_technique(line)
        severity = determine_severity(line)
        payload_obj = {
            "payload": line,
            "category": "sqli",
            "metadata": {
                "type": payload_type,
                "technique": technique,
                "dbms": db_type,
                "severity": severity,
                "source": section_name
            }
        }
        payloads.append(payload_obj)
    return payloads
 def determine_type(payload: str, section: str) -> str:
    """Determine the type of SQL injection payload"""
    payload_lower = payload.lower()
    section_lower = section.lower()
    if 'auth' in section_lower or 'bypass' in section_lower:
        return 'authentication_bypass'
    elif 'union' in payload_lower:
        return 'union_based'
    elif 'error' in section_lower:
        return 'error_based'
    elif 'blind' in section_lower or 'time' in section_lower:
        return 'blind'
    elif 'sleep' in payload_lower or 'benchmark' in payload_lower or 'waitfor' in payload_lower or 'pg_sleep' in payload_lower:
        return 'time_based'
    elif 'boolean' in section_lower:
        return 'boolean_based'
    elif 'stacked' in section_lower:
        return 'stacked_queries'
    elif 'polyglot' in section_lower:
        return 'polyglot'
    elif 'out of band' in section_lower or 'oast' in section_lower:
        return 'out_of_band'
    else:
        return 'generic'
 def determine_technique(payload: str) -> str:
    """Determine the SQL injection technique"""
    payload_lower = payload.lower()
    if 'union' in payload_lower and 'select' in payload_lower:
        return 'union_select'
    elif any(keyword in payload_lower for keyword in ['sleep', 'benchmark', 'waitfor', 'pg_sleep', 'dbms_lock']):
        return 'time_delay'
    elif "' or " in payload_lower or '" or ' in payload_lower or '1=1' in payload_lower:
        return 'boolean_logic'
    elif 'extractvalue' in payload_lower or 'updatexml' in payload_lower or 'xmltype' in payload_lower:
        return 'xml_error'
    elif 'load_file' in payload_lower or 'into outfile' in payload_lower or 'into dumpfile' in payload_lower:
        return 'file_operation'
    elif 'exec' in payload_lower or 'execute' in payload_lower or 'xp_cmdshell' in payload_lower:
        return 'command_execution'
    elif 'cast' in payload_lower or 'convert' in payload_lower:
        return 'type_conversion'
    else:
        return 'basic'
 def determine_severity(payload: str) -> str:
    """Determine the severity of the SQL injection payload"""
    payload_lower = payload.lower()
    # Critical if it involves command execution or file operations
    if any(keyword in payload_lower for keyword in ['xp_cmdshell', 'exec', 'execute', 'load_file', 'into outfile', 'into dumpfile']):
        return 'critical'
    # High for data extraction
    elif any(keyword in payload_lower for keyword in ['union select', 'information_schema', 'sys.', 'database()', 'version()']):
        return 'high'
    # Medium for authentication bypass
    elif "' or " in payload_lower or '" or ' in payload_lower or '1=1' in payload_lower:
        return 'high'
    # Medium for blind injection
    elif any(keyword in payload_lower for keyword in ['sleep', 'benchmark', 'waitfor', 'pg_sleep']):
        return 'medium'
    else:
        return 'medium'
 def parse_markdown_file(file_path: Path) -> List[Dict[str, Any]]:
    """Parse a markdown file and extract all SQL injection payloads"""
    all_payloads = []
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    # Determine database type from filename
    db_type = "generic"
    if 'mysql' in file_path.name.lower():
        db_type = "mysql"
    elif 'mssql' in file_path.name.lower():
        db_type = "mssql"
    elif 'postgresql' in file_path.name.lower():
        db_type = "postgresql"
    elif 'oracle' in file_path.name.lower():
        db_type = "oracle"
    elif 'sqlite' in file_path.name.lower():
        db_type = "sqlite"
    elif 'db2' in file_path.name.lower():
        db_type = "db2"
    elif 'cassandra' in file_path.name.lower():
        db_type = "cassandra"
    elif 'bigquery' in file_path.name.lower():
        db_type = "bigquery"
    # Extract code blocks
    code_block_pattern = r'```(?:sql|bash|ps1|sh)?\n(.*?)```'
    matches = re.findall(code_block_pattern, content, re.DOTALL)
    # Extract section headers for context
    current_section = file_path.stem
    # Find section headers
    section_pattern = r'^#+\s+(.+)$'
    sections = re.findall(section_pattern, content, re.MULTILINE)
    # Process code blocks
    for i, code_block in enumerate(matches):
        # Try to find the section this code block belongs to
        section_name = current_section
        if i < len(sections):
            section_name = sections[i] if i < len(sections) else current_section
        payloads = extract_payloads_from_code_block(code_block, section_name, db_type)
        all_payloads.extend(payloads)
    return all_payloads
 def parse_text_file(file_path: Path) -> List[Dict[str, Any]]:
    """Parse a text file containing raw SQL injection payloads"""
    all_payloads = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        lines = f.readlines()
    # Determine database type from filename
    db_type = "generic"
    filename_lower = file_path.name.lower()
    if 'mysql' in filename_lower:
        db_type = "mysql"
    elif 'mssql' in filename_lower:
        db_type = "mssql"
    elif 'postgres' in filename_lower:
        db_type = "postgresql"
    elif 'oracle' in filename_lower:
        db_type = "oracle"
    for line in lines:
        line = line.strip()
        # Validate if this is a real SQL injection payload
        if not is_valid_sqli_payload(line):
            continue
        payload_type = determine_type(line, file_path.stem)
        technique = determine_technique(line)
        severity = determine_severity(line)
        payload_obj = {
            "payload": line,
            "category": "sqli",
            "metadata": {
                "type": payload_type,
                "technique": technique,
                "dbms": db_type,
                "severity": severity,
                "source": file_path.stem
            }
        }
        all_payloads.append(payload_obj)
    return all_payloads
 def main():
    """Main function to convert all SQL injection payloads to JSON"""
    base_dir = Path(__file__).parent
    output_file = base_dir / 'sqli_payloads.json'
    all_payloads = []
    # Process markdown files
    md_files = [
        'README.md',
        'MySQL Injection.md',
        'MSSQL Injection.md',
        'PostgreSQL Injection.md',
        'OracleSQL Injection.md',
        'SQLite Injection.md',
        'DB2 Injection.md',
        'Cassandra Injection.md',
        'BigQuery Injection.md'
    ]
    for md_file in md_files:
        file_path = base_dir / md_file
        if file_path.exists():
            print(f"Processing {md_file}...")
            payloads = parse_markdown_file(file_path)
            all_payloads.extend(payloads)
            print(f"  Found {len(payloads)} payloads")
    # Process Intruder folder
    intruder_dir = base_dir / 'Intruder'
    if intruder_dir.exists():
        for txt_file in intruder_dir.iterdir():
            if txt_file.is_file():
                print(f"Processing {txt_file.name}...")
                payloads = parse_text_file(txt_file)
                all_payloads.extend(payloads)
                print(f"  Found {len(payloads)} payloads")
    # Remove duplicates while preserving order
    seen = set()
    unique_payloads = []
    for payload in all_payloads:
        payload_str = payload['payload']
        if payload_str not in seen:
            seen.add(payload_str)
            unique_payloads.append(payload)
    # Write to JSON file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(unique_payloads, f, indent=2, ensure_ascii=False)
    print(f"\nTotal payloads: {len(all_payloads)}")
    print(f"Unique payloads: {len(unique_payloads)}")
    print(f"Output saved to: {output_file}")
 if __name__ == '__main__':
    main()
--- a/Injection/sqli_payloads.json
+++ b/Injection/sqli_payloads.json
--- a/Injection/convert_to_json.py
+++ b/Injection/convert_to_json.py
@ -0,0 +1,278 @@
 #!/usr/bin/env python3
 """
 Script to convert XSS payloads from markdown files to JSON format
 """
 import json
 import re
 from pathlib import Path
 from typing import List, Dict, Any
 def is_valid_xss_payload(line: str) -> bool:
    """Check if a line is a valid XSS payload"""
    # Skip empty lines, comments, and documentation
    if not line or line.startswith('//') or line.startswith('#') or line.startswith('/*'):
        return False
    # Skip lines that are clearly documentation
    if line.startswith('-') or line.startswith('*') or line.startswith('['):
        return False
    # Skip lines that contain common documentation words
    doc_words = ['you can', 'this is', 'codename:', 'example:', 'note:', 'payload replacing',
                 'simple script', 'for this reason', 'better to use', 'allows you',
                 'one-line http', 'can be used', 'requirements:', 'payload:', 'source:']
    if any(word in line.lower() for word in doc_words):
        return False
    # Skip lines that are just sentences/descriptions (contain spaces and common words)
    common_words = ['the', 'is', 'are', 'was', 'were', 'been', 'being', 'have', 'has', 'had',
                    'do', 'does', 'did', 'will', 'would', 'should', 'can', 'could', 'may',
                    'might', 'must', 'shall', 'a', 'an', 'and', 'or', 'but', 'if', 'because',
                    'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against',
                    'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below']
    word_count = sum(1 for word in common_words if ' ' + word + ' ' in ' ' + line.lower() + ' ')
    if word_count >= 3:  # If it contains 3 or more common words, likely a sentence
        return False
    # Skip http/https links that are not part of payloads
    if line.startswith('http') and '<' not in line and 'javascript:' not in line.lower():
        return False
    # Skip lines that look like version numbers or identifiers
    if line.count('.') >= 2 and line.count('<') == 0:
        return False
    # Must contain at least one XSS indicator
    xss_indicators = [
        '<', '>', 'javascript:', 'onerror', 'onload', 'onclick',
        'alert', 'prompt', 'confirm', 'eval', 'script', 'svg',
        'img', 'iframe', 'body', 'div', 'data:', 'vbscript:'
    ]
    has_indicator = any(indicator in line.lower() for indicator in xss_indicators)
    # Additional check: if line has < or >, it's more likely to be a payload
    has_html_chars = '<' in line or '>' in line
    # If it has HTML characters, be more lenient
    if has_html_chars:
        return True
    # Otherwise, be more strict
    return has_indicator and '(' in line  # Likely contains function call
 def extract_payloads_from_code_block(code_block: str, section_name: str) -> List[Dict[str, Any]]:
    """Extract individual payloads from a code block"""
    payloads = []
    lines = code_block.strip().split('\n')
    for line in lines:
        line = line.strip()
        # Validate if this is a real XSS payload
        if not is_valid_xss_payload(line):
            continue
        # Determine type and context based on the payload
        payload_type = determine_type(line, section_name)
        context = determine_context(line)
        severity = determine_severity(line)
        payload_obj = {
            "payload": line,
            "category": "xss",
            "metadata": {
                "type": payload_type,
                "context": context,
                "severity": severity,
                "source": section_name
            }
        }
        payloads.append(payload_obj)
    return payloads
 def determine_type(payload: str, section: str) -> str:
    """Determine the type of XSS payload"""
    payload_lower = payload.lower()
    if 'polyglot' in section.lower():
        return 'polyglot'
    elif 'bypass' in section.lower():
        return 'bypass'
    elif '<script' in payload_lower:
        return 'script_tag'
    elif '<img' in payload_lower:
        return 'img_tag'
    elif '<svg' in payload_lower:
        return 'svg_tag'
    elif '<iframe' in payload_lower:
        return 'iframe'
    elif 'onerror' in payload_lower or 'onload' in payload_lower or 'onclick' in payload_lower:
        return 'event_handler'
    elif 'javascript:' in payload_lower:
        return 'javascript_uri'
    elif 'data:' in payload_lower:
        return 'data_uri'
    elif '<body' in payload_lower or '<div' in payload_lower:
        return 'html_element'
    else:
        return 'generic'
 def determine_context(payload: str) -> str:
    """Determine the context where the payload works"""
    payload_lower = payload.lower()
    if 'href=' in payload_lower or 'src=' in payload_lower:
        return 'attribute'
    elif 'javascript:' in payload_lower:
        return 'href'
    elif '<script' in payload_lower:
        return 'script_tag'
    elif 'style' in payload_lower:
        return 'style'
    elif 'on' in payload_lower and '=' in payload_lower:
        return 'event_attribute'
    else:
        return 'html'
 def determine_severity(payload: str) -> str:
    """Determine the severity of the payload"""
    payload_lower = payload.lower()
    # Critical if it can steal cookies or sensitive data
    if 'document.cookie' in payload_lower or 'fetch' in payload_lower:
        return 'critical'
    # High for most XSS payloads
    elif 'alert' in payload_lower or 'prompt' in payload_lower or 'confirm' in payload_lower:
        return 'high'
    # Medium for potential XSS
    else:
        return 'medium'
 def parse_markdown_file(file_path: Path) -> List[Dict[str, Any]]:
    """Parse a markdown file and extract all payloads"""
    all_payloads = []
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    # Extract code blocks
    code_block_pattern = r'```(?:javascript|html|js|xml|svg|csharp|ps1)?\n(.*?)```'
    matches = re.findall(code_block_pattern, content, re.DOTALL)
    # Extract section headers for context
    current_section = file_path.stem
    # Find section headers
    section_pattern = r'^#+\s+(.+)$'
    sections = re.findall(section_pattern, content, re.MULTILINE)
    # Process code blocks
    for i, code_block in enumerate(matches):
        # Try to find the section this code block belongs to
        section_name = current_section
        if i < len(sections):
            section_name = sections[i] if i < len(sections) else current_section
        payloads = extract_payloads_from_code_block(code_block, section_name)
        all_payloads.extend(payloads)
    return all_payloads
 def parse_text_file(file_path: Path) -> List[Dict[str, Any]]:
    """Parse a text file containing raw payloads"""
    all_payloads = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        lines = f.readlines()
    for line in lines:
        line = line.strip()
        # Validate if this is a real XSS payload
        if not is_valid_xss_payload(line):
            continue
        payload_type = determine_type(line, file_path.stem)
        context = determine_context(line)
        severity = determine_severity(line)
        payload_obj = {
            "payload": line,
            "category": "xss",
            "metadata": {
                "type": payload_type,
                "context": context,
                "severity": severity,
                "source": file_path.stem
            }
        }
        all_payloads.append(payload_obj)
    return all_payloads
 def main():
    """Main function to convert all XSS payloads to JSON"""
    base_dir = Path(__file__).parent
    output_file = base_dir / 'xss_payloads.json'
    all_payloads = []
    # Process markdown files
    md_files = [
        'README.md',
        '1 - XSS Filter Bypass.md',
        '2 - XSS Polyglot.md',
        '3 - XSS Common WAF Bypass.md',
        '4 - CSP Bypass.md',
        '5 - XSS in Angular.md'
    ]
    for md_file in md_files:
        file_path = base_dir / md_file
        if file_path.exists():
            print(f"Processing {md_file}...")
            payloads = parse_markdown_file(file_path)
            all_payloads.extend(payloads)
            print(f"  Found {len(payloads)} payloads")
    # Process Intruders folder
    intruders_dir = base_dir / 'Intruders'
    if intruders_dir.exists():
        for txt_file in intruders_dir.glob('*.txt'):
            print(f"Processing {txt_file.name}...")
            payloads = parse_text_file(txt_file)
            all_payloads.extend(payloads)
            print(f"  Found {len(payloads)} payloads")
    # Remove duplicates while preserving order
    seen = set()
    unique_payloads = []
    for payload in all_payloads:
        payload_str = payload['payload']
        if payload_str not in seen:
            seen.add(payload_str)
            unique_payloads.append(payload)
    # Write to JSON file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(unique_payloads, f, indent=2, ensure_ascii=False)
    print(f"\nTotal payloads: {len(all_payloads)}")
    print(f"Unique payloads: {len(unique_payloads)}")
    print(f"Output saved to: {output_file}")
 if __name__ == '__main__':
    main()
--- a/Injection/xss_payloads.json
+++ b/Injection/xss_payloads.json