#! /usr/bin/env python

#---------------------------------------------------------------------
# Darcs Reannotater
#
# The annotation output produced by Darcs is difficult to read.  This
# program converts the Darcs annotation output to something similar to
# CVS's annotation output.
#
# This isn't intended to be a robust tool.  It doesn't do much error
# checking.  Its original purpose was to make it easy to try out
# different ideas for a better "darcs annotate" format.
# 
# Send feedback and patches to <kannan@cakoose.com>.
#
#---------------------------------------------------------------------
# Usage
#
# This program processes the output of "darcs annotate".
#
# Using Darcs output directly:
#    darcs annotate File.txt | ./darcs-reannotate
#
# Reading from files:
#    darcs annotate File1.txt > File1.ann
#    darcs annotate File2.txt > File2.ann
#    ./darcs-reannotate File1.ann File2.ann
#
# Output always goes to stdout.
#
#---------------------------------------------------------------------
# What It Does
#
# The program reads in all the lines and maintains a set of all the
# involved patches.  It then creates a unique "nickname" for each
# patch.  Idealy, the nickname is short and meaningful.  This is done
# in the "create_patch_nicknames(...)" function.
#
# Then, all the patches are printed out along with their associated
# nicknames.  This is followed by contents of the revision-controlled
# file, with each line prefixed with the patch nickname.
#
#---------------------------------------------------------------------
# Known Issues
#
# This program crashes on certain valid inputs.  Darcs' annotation
# output uses a special format when describing changes from the latest
# patchset in the repo.  This program doesn't understand that format
# yet.  This should only happen when annotating files that are touched
# by the latest patchset.  To work around this issue, you can record a
# new temporary patchset that doesn't affect any of the files you want
# to annotate.  I think the proper way to fix this is to change Darcs
# to (optionally) eliminate special treatment of the latest patch.
#
# The date parsing library routine doesn't seem to recognize many time
# zones (at least, when run on my machine).  There's a hack in there
# to recognize some of the time zones used in Darcs' own repo (look at
# the "INPUT_TIME_FORMATS" array).
#
#---------------------------------------------------------------------

import sys

def print_usage_info(prog_name, out):
   out.write("""
This program processes the output of "darcs annotate".

Using darcs output directly:
   darcs annotate File.txt | %s

Reading from files:
   darcs annotate File1.txt > File1.ann
   darcs annotate File2.txt > File2.ann
   %s File1.ann File2.ann

Output always goes to stdout.

""" % (prog_name, prog_name))

def main(prog_name='darcs-reannotate', args=[]):

   # Quick check for '-h' or '--help'
   for arg in args:
      if arg == '-h' or arg == '--help':
         print_usage_info(prog_name, sys.stdout)
         return
      elif arg == '-' or arg == '--':
         break;

   if len(args) == 0:
      # Read from stdin
      try:
         convert(sys.stdin, sys.stdout)
      except KeyboardInterrupt: pass
   else:
      # Read command line args
      look_for_dashes = True
      for file_name in args:
         if look_for_dashes and (file_name == '-' or file_name == '--'):
            look_for_dashes = False
            continue
         try:
            file = open(file_name)
            convert(file, sys.stdout)
         except IOError, err:
            sys.stderr.write("Couldn't open %s for reading: %s\n" % (file_name, err.strerror))

# Struct with 4 fields
#   author :: String
#   date   :: String
#   short  :: String
#   long   :: [String]
class PatchInfo:
   def __init__(self, author, date, short, long=[]):
      assert type(author) == str
      assert type(short) == str
      assert type(long) == list
      self.author = author
      self.date = date
      self.short = short
      self.long = long
      self.saved_hash = hash(author) ^ hash(date) ^ hash(short)
         # Not sure if Darcs considers 'long' part of the patch identity.
   def __hash__(self):
      return self.saved_hash
   def __eq__(self, other):
      if type(self) != type(other): return False
      return \
         (self.author == other.author) and \
         (self.date == other.date) and \
         (self.short == other.short)
   def __ne__(self, other): return not self.__eq__(other)

INPUT_TIME_FORMATS = [
   "%Y%m%d%H%M%S",
   "%a %b %d %H:%M:%S %Z %Y",

   # For some reason, many time zones aren't recognized when this
   # program is run on my machine.  This is a hack to have those time
   # zones included, even though the proper time zone conversion wont
   # take place.
   "%a %b %d %H:%M:%S EST %Y",
   "%a %b %d %H:%M:%S EDT %Y",
   "%a %b %d %H:%M:%S CEST %Y",
]

# date_string :: String
# :: String
def pretty_date(date_string):
   import time
   # Try all the time formats until one works
   for parse_format in INPUT_TIME_FORMATS:
      try:
         timestamp = time.strptime(date_string, parse_format)
         return time.strftime("%x %X", timestamp)
      except ValueError:
         pass

   # Unable to parse.  Just return the input string.
   #raise ("Couldn't parse '%s'" % date_string)
   return date_string

# fin: input stream with Darcs' raw annotation output
# fout: output stream for friendly annotation output
def convert(fin, fout):
   patches = {}    # :: {PatchInfo: Boolean} (logically a set)
   lines = []      # :: [(Line,PatchInfo)]

   # Read and ignore file-creation patch
   file_creation_patch = read_patch_info(fin)
   if file_creation_patch == None:
      sys.stderr.write("Error: expected file creation patch info.\n")
      return

   # Read in all the lines.
   while True:
      patch_info = read_patch_info(fin)
      if patch_info == None: break
      line = fin.readline()
      patches[patch_info] = True
      lines.append((line, patch_info))

   # :: {PatchInfo : String}
   patch_nicknames = create_patch_nicknames(patches)

   fout.write("-- Patches --\n\n")

   # Dump (Nickname -> Patch) mapping
   longest_nickname = 1
   for (patch_info, nickname) in patch_nicknames.iteritems():
      fout.write(nickname + ": " + patch_info.author + ", " + pretty_date(patch_info.date) + "\n")
      #fout.write(nickname + ": " + patch_info.author + ", " + patch_info.date + "\n")
      fout.write(" * " + patch_info.short + '\n')
      for line in patch_info.long:
         fout.write(' ')
         fout.write(line)
      fout.write('\n')
      longest_nickname = max(longest_nickname, len(nickname))

   bunch_of_spaces = ''.join([' ' for num in xrange(longest_nickname - 1)])
   bunch_of_spaces += '^ '

   fout.write("-- File Contents --\n\n")

   # Dump file content
   prev_patch_info = None
   for (line, patch_info) in lines:
      if (patch_info == prev_patch_info):
         # Coalesce
         fout.write(bunch_of_spaces)
      else:
         prev_patch_info = patch_info
         nickname = patch_nicknames[patch_info]
         padding = longest_nickname - len(nickname)
         fout.write(bunch_of_spaces[0:padding])
         fout.write(nickname)
         fout.write(':')
      fout.write(line)

# Try and create unique, easy-to-read names for each patch.  This function
# should try as hard as it can to create really good names.  Right now, it
# just uses a couple tokens from the 'author' field and appends a number to
# disambiguate.
#
# patches :: {PatchInfo : Boolean} (logically a set)
# :: {PatchInfo : String}
def create_patch_nicknames(patches):
   patch_nicknames = {}
   nickname_counters = {} # :: {String : Integer}

   # Maximum number of chars in prefix.  The appended digits could make
   # the nickname slightly longer, though.
   char_limit = 10

   import re
   splitter = re.compile('[ \.<>@]*')

   nickname_counters[""] = 1

   for patch_info in patches.iterkeys():
      parts = splitter.split(patch_info.author)
      #print "parts =", parts

      if len(parts) == 0:
         prefix = ""
      elif len(parts) == 1:
         prefix = parts[0]
      else:
         # Use first component plus first letter of second component
         prefix = (parts[0]+parts[1][0])

      # Limit prefix length
      prefix = prefix[0:char_limit]

      # Strip numbers from the end of the nickname.
      while (prefix[-1].isdigit()):
         prefix = prefix[0:-1]

      lcase_prefix = prefix.lower()

      if nickname_counters.has_key(lcase_prefix):
         # Some other patch is already using this nickname.
         count = nickname_counters[lcase_prefix]
         suffix = str(count+1)
         nickname_counters[lcase_prefix] = count+1
      else:
         # We're the first patch to use this nickname
         nickname_counters[lcase_prefix] = 1
         suffix = ""

      nickname = prefix + suffix
      patch_nicknames[patch_info] = nickname

   return patch_nicknames

# Reads a line and drops the trailing newline
def read_line(fin):
   line = fin.readline()
   if line.endswith('\n'):
      line = line[:-1]
   return line

# Read a single patch info entry in Darcs' raw annotation output
# Return a PatchInfo object or None if there's nothing to be read.
#
# :: Maybe PatchInfo
def read_patch_info(fin):
   # Short patch name
   line = read_line(fin)
   if (line == ''): return None
   try:
      open_bracket_pos = line.index('[')
   except ValueError:
      return None  # end of file
   short = line[open_bracket_pos+1:]

   # Author, Date
   line = read_line(fin)
   star_star_pos = line.index('**')
   author = line[2:star_star_pos]
   date = line[star_star_pos+2:]

   # Long comment
   long = []
   try:
      close_bracket_pos = date.index(']')
      # If there is a close bracket, then there's no long description.
      # Just strip of the trailing ']'
      date = date[:close_bracket_pos]
   except ValueError:
      # Parse long description
      line = read_line(fin)
      while not line.startswith(']'):
         line = line[1:]  # Strip leading space
         long.append(line)
         line = fin.readline()

   return PatchInfo(author, date, short, long)

if __name__ == "__main__":
   main(sys.argv[0], sys.argv[1:])