#!/usr/bin/python
from __future__ import print_function
from codecs import open as copen
from os import listdir, path
from sys import argv

import unicodedata

# usage: utf8-fix.py PATH [codec] [normalize]
PATH = argv[1] if len(argv) > 1 else ""
NORMALIZE = False
ENCODING = None
DEFAULT_ENCODING = "iso8859_2"  # iso8859_2 a.k.a latin2

for arg in argv[2:]:
    if arg.lower() == "normalize":
        NORMALIZE = True
    else:
        ENCODING = arg


def convert_file(file_path):
    print("[*]", file_path, "fixed!")
    foriginal = copen(file_path, "r", "utf8", errors='ignore')
    content = foriginal.read()
    foriginal.close()

    ccontent = fix_encoding(content, ENCODING, NORMALIZE, True)
    fconverted = copen(file_path, "w", "utf8")
    fconverted.write(ccontent)
    fconverted.close()

def normalize_str(text):
    return ''.join(
        c for c in unicodedata.normalize('NFKD', text)
        if unicodedata.category(c) != 'Mn'
    )

def fix_encoding(content, encoding=None, norm=False, verbose=False):
    encoding = encoding or DEFAULT_ENCODING

    try:
        fixed = content.encode(encoding).decode("utf8")
    except:
        fixed = content
        if verbose:
            print("[*] error: can't fix the encoding. mixed encoding?")

    if norm:
        return normalize_str(fixed)
    else:
        return fixed


if __name__ == "__main__":
    if path.isfile(PATH):
        convert_file(PATH)

    elif path.isdir(PATH):

        for ffile in listdir(PATH):
            file_path = path.join(PATH, ffile)

            if path.isfile(file_path):
                convert_file(file_path)
    else:
        print(
            "[*] error: "
            "usage: %s FILE_OR_DIR_PATH [codec] [normalize]"
            %
            argv[0]
        )