"""
makesite.py - A script for generating rutherfurd.net
from a collection of reStructuredText files to ".ht"
to files which are converted to HTML using ``ht2html.py``.

To make generating the site faster, MD5 digests are 
created for all found text files and saved, so that
only files changed between generation times will need
to be converted from ".txt" -> ".ht" -> ".html".

Note that one may pass "--force" to regenerate files
whether they've changed or not.

$Id$
"""

import fnmatch
import getopt
import md5
import os
import sys

try:
    True
except NameError:
    True,False = 1,0

# default section number H{x} used for sections
BASE_SECTION = 3

# site root, relative to this script
ROOTDIR = '.'

# file containing MD5 digests for '.txt' files
DIGEST_FILE = 'digests'

# path to "ht2html.py" script
HT2HTML = 'c:\\bin\\ht2html-2.0\\ht2html.py'

# path to "rst2ht.py" script
RST2HT = 'rst2ht.py'

# python style class to pass to "ht2html"
STYLECLASS = 'RutherfurdDotNet'

# stylesheet to pass to "rst2ht.py"
STYLESHEET='default.css'


def findFiles(rootdir,pattern):
    """
    Returns a list of files within `rootdir` matching
    the glob `pattern`.
    """
    found = []
    def callback((found,pattern), directory, files):
        is_hidden = False
        # check if we're in a directory tree where one
        # of the dirs starts with '.' -- if so, ignore
        # this directory and these files
        for d in directory.split(os.sep):
            if len(d) > 1 and d[0].startswith('.'):
                is_hidden = True
        if is_hidden:
            return
        found.extend([os.path.join(directory,f) \
            for f in files \
            if fnmatch.fnmatch(f,pattern)])
    os.path.walk(rootdir, callback, (found,pattern))
    return found

def getPathToRoot(path,sub=2):
    """
    Returns relative path to `root` given path

    >>> from makesite import getPathToRoot
    >>> getPathToRoot('.\\cookbook\\index.txt')
    '../'
    >>> getPathToRoot('.\\jEdit\\plugins\\editorscheme\\index.txt')
    '../../../'
    >>>
    """
    root = '../' * (len(path.replace('\\','/').split('/')) - sub)
    if not root:
        root = './' # root directory must be relative to current dir
    return root

def loadDigests(rootdir):
    """
    Returns dict of MD5 digests for '.txt' files.

    {filename: hexdigest[,...]}
    """
    digests = {}
    try:
        f = open(os.path.join(rootdir,DIGEST_FILE))
        for line in f.xreadlines():
            # ignore blank lines
            if not line.strip():
                continue
            # format is {hexdigest}\t{filepath}
            digest,name = line.strip().split('\t')
            digests[name] = digest
        f.close()
    except IOError, e:
        pass
    return digests

def saveDigests(rootdir,digests):
    """
    Saves MD5 hex digests for files in `rootdir`.

    File format is::

        {digest}\t{filepath}
        ...
        {digest}\t{filepath}

    """
    f = open(os.path.join(rootdir,DIGEST_FILE),'w')
    _digests = digests.items()
    _digests.sort()
    for name,digest in _digests:
        f.write('%s\t%s\n' % (digest,name))
    f.close()

def getDigest(path):
    """
    Creates md5 hex digest for `path`.
    """
    m = md5.new()
    f = open(path)
    m.update(f.read())
    f.close()
    return m.hexdigest()

def isUpToDate(path,digests):
    """
    Checks whether a file's contents has changed since the last
    time this script was run.  If a digest is not found, `False`
    is returned.

    Adds the current digest to `digests` regardless of whether
    the file is up to date or not.
    """
    digest = getDigest(path)
    if digest != digests.get(path,''):
        digests[path] = digest
        return False
    return True

def main(forceall,clean):

    path = ROOTDIR
    digests = loadDigests(ROOTDIR)

    # reST to .ht (files starting with ``_`` are private)
    txt_files = findFiles(path, '[A-z]*.txt')
    for txt_file in txt_files:
        if not forceall and isUpToDate(txt_file,digests):
            continue
        rootdir = getPathToRoot(txt_file)
        ht_file = os.path.splitext(txt_file)[0] + '.ht'
        cmd = 'c:\\python234\\python.exe %s -g -t -s  --report=3 --stylesheet=%s --base-section=%d %s %s' % \
            (RST2HT, rootdir + STYLESHEET, BASE_SECTION, txt_file, ht_file,)
        print cmd
        os.system(cmd)

    # .ht to HTML (files starting with ``_`` are private)
    ht_files = findFiles(path, '[A-z]*.ht')
    for ht_file in ht_files:
        if not forceall and isUpToDate(ht_file,digests):
            continue
        rootdir = getPathToRoot(ht_file)
        cmd = 'c:\\python234\\python.exe "%s" -r %s -s %s %s' % \
            (HT2HTML, rootdir, STYLECLASS, ht_file,)
        print cmd
        os.system(cmd)

    # remove ".ht" files
    if clean:
        for f in findFiles(path, '*.ht'):
            print "removing %s" % f
            os.unlink(f)

    print 'saving digests...'
    saveDigests(ROOTDIR,digests)
    print 'OK'

def usage():
    print os.path.split(sys.argv[0])[-1] + '-h|--help -f|--force -c|--clean'

if __name__ == '__main__':
    # by default don't force a rebuild of all pages
    forceall = False
    clean = False
    try:
        opts,args = getopt.getopt(sys.argv[1:], "-hfc", ["help","force","clean"])
    except getopt.GetoptError:
        usage()
        sys.exit(1)
    for o,a in opts:
        if o in ('-h','--help'):
            usage()
            sys.exit()
        if o in ('-f','--force'):
            forceall = True
        elif o in ('-c','--clean'):
            clean = True

    main(forceall,clean)

# :indentSize=4:lineSeparator=\n:noTabs=true:tabSize=4:
