# Code to call enscript and get a syntax-highlighted file.

from os import popen2
from re import compile
from cgi import escape

re_span1 = compile(r'(<span style="color: #......">)(<[bi]>)')
re_span2 = compile(r'(</[bi]>)(</span>)')

extension_table = {
    'asm': 'asm',
    'as': 'asm',
    'awk': 'awk',
    'changelog': 'changelog',
    'c': 'c',
    'cpp': 'cpp',
    'cc': 'cpp',
    'cs': 'java',
    'csh': 'csh',
    'eps': 'postscript',
    'pas': 'delphi',
    'patch': 'diffu',
    'el': 'elisp',
    'htm': 'html',
    'html': 'html',
    'java': 'java',
    'js': 'javascript',
    'javascript': 'javascript',
    'ksh': 'ksh',
    'm4': 'm4',
    'eml': 'mail',
    'makefile': 'makefile',
    'objc': 'objc',
    'pl': 'perl',
    'pm': 'perl',
    'ps': 'postscript',
    'py': 'python',
    'rb': 'ruby',
    'rbw': 'ruby',
    'scheme': 'scheme',
    'sh': 'sh',
    'sql': 'sql',
    'states': 'states',
    'tcl': 'tcl',
    'tcsh': 'tcsh',
    'tex': 'tex',
    'vba': 'vba',
    'vrml': 'vrml',
    'zsh': 'zsh'
    }

def code2html(data, lang=None, filename=None):
    """
    Returns a syntax-highlighted version of given data string.
    """
    if not lang:
        ext = filename.split('.')[-1]
        lang = extension_table.get(ext.lower(), None)
    params = '-qp - -Whtml --color'
    if lang:
        params += ' -E%s' % lang
    params += ' -'

    try:
        din, dout = popen2('enscript %s' % params)
        din.write(data)
        din.close()
        lines = dout.readlines()
        dout.close()
    except IOError:
        # No highlighting. Return data with just HTML encoding.
        return escape(data)

    result = []
    stage = 0 # 0 for start, 1 for code area, 2 for beyond
    for line in lines:
        if stage == 0:
            if line.lower().find('<pre>') != -1:
                stage = 1
        elif stage == 1:
            if line.lower().find('</pre>') != -1:
                line = line.replace('</PRE>', '').replace('</pre>', '')
                stage = 2
            line = line.replace('<B>', '<b>')
            line = line.replace('</B>', '</b>')
            line = line.replace('<I>', '<i>')
            line = line.replace('</I>', '</i>')
            line = line.replace('</FONT>', '</span>')
            line = line.replace('<FONT COLOR="', '<span ' \
                'style="color: ')
            # Fix enscript's incorrect tag closing order.
            line = re_span1.sub(r'\2\1', line)
            line = re_span2.sub(r'\2\1', line)
            result.append(line)
        elif stage == 2:
            break
    return ''.join(result) # Newlines are preserved from enscript.


if __name__=='__main__':
    from sys import argv
    if len(argv) > 1:
        print "<html><head><title>test</title></head>"
        print "<body>"
        for filename in argv[1:]:
            print "<pre>"
            data = file(filename, 'r').read()
            print code2html(data, filename=filename)
            print "</pre>"
        print "</body></html>"
