#! /usr/bin/env python #--------------------------------------------------------------------- # Darcs Reannotater # # The annotation output produced by Darcs is difficult to read. This # program converts the Darcs annotation output to something similar to # CVS's annotation output. # # This isn't intended to be a robust tool. It doesn't do much error # checking. Its original purpose was to make it easy to try out # different ideas for a better "darcs annotate" format. # # Send feedback and patches to . # #--------------------------------------------------------------------- # Usage # # This program processes the output of "darcs annotate". # # Using Darcs output directly: # darcs annotate File.txt | ./darcs-reannotate # # Reading from files: # darcs annotate File1.txt > File1.ann # darcs annotate File2.txt > File2.ann # ./darcs-reannotate File1.ann File2.ann # # Output always goes to stdout. # #--------------------------------------------------------------------- # What It Does # # The program reads in all the lines and maintains a set of all the # involved patches. It then creates a unique "nickname" for each # patch. Idealy, the nickname is short and meaningful. This is done # in the "create_patch_nicknames(...)" function. # # Then, all the patches are printed out along with their associated # nicknames. This is followed by contents of the revision-controlled # file, with each line prefixed with the patch nickname. # #--------------------------------------------------------------------- # Known Issues # # This program crashes on certain valid inputs. Darcs' annotation # output uses a special format when describing changes from the latest # patchset in the repo. This program doesn't understand that format # yet. This should only happen when annotating files that are touched # by the latest patchset. To work around this issue, you can record a # new temporary patchset that doesn't affect any of the files you want # to annotate. I think the proper way to fix this is to change Darcs # to (optionally) eliminate special treatment of the latest patch. # # The date parsing library routine doesn't seem to recognize many time # zones (at least, when run on my machine). There's a hack in there # to recognize some of the time zones used in Darcs' own repo (look at # the "INPUT_TIME_FORMATS" array). # #--------------------------------------------------------------------- import sys def print_usage_info(prog_name, out): out.write(""" This program processes the output of "darcs annotate". Using darcs output directly: darcs annotate File.txt | %s Reading from files: darcs annotate File1.txt > File1.ann darcs annotate File2.txt > File2.ann %s File1.ann File2.ann Output always goes to stdout. """ % (prog_name, prog_name)) def main(prog_name='darcs-reannotate', args=[]): # Quick check for '-h' or '--help' for arg in args: if arg == '-h' or arg == '--help': print_usage_info(prog_name, sys.stdout) return elif arg == '-' or arg == '--': break; if len(args) == 0: # Read from stdin try: convert(sys.stdin, sys.stdout) except KeyboardInterrupt: pass else: # Read command line args look_for_dashes = True for file_name in args: if look_for_dashes and (file_name == '-' or file_name == '--'): look_for_dashes = False continue try: file = open(file_name) convert(file, sys.stdout) except IOError, err: sys.stderr.write("Couldn't open %s for reading: %s\n" % (file_name, err.strerror)) # Struct with 4 fields # author :: String # date :: String # short :: String # long :: [String] class PatchInfo: def __init__(self, author, date, short, long=[]): assert type(author) == str assert type(short) == str assert type(long) == list self.author = author self.date = date self.short = short self.long = long self.saved_hash = hash(author) ^ hash(date) ^ hash(short) # Not sure if Darcs considers 'long' part of the patch identity. def __hash__(self): return self.saved_hash def __eq__(self, other): if type(self) != type(other): return False return \ (self.author == other.author) and \ (self.date == other.date) and \ (self.short == other.short) def __ne__(self, other): return not self.__eq__(other) INPUT_TIME_FORMATS = [ "%Y%m%d%H%M%S", "%a %b %d %H:%M:%S %Z %Y", # For some reason, many time zones aren't recognized when this # program is run on my machine. This is a hack to have those time # zones included, even though the proper time zone conversion wont # take place. "%a %b %d %H:%M:%S EST %Y", "%a %b %d %H:%M:%S EDT %Y", "%a %b %d %H:%M:%S CEST %Y", ] # date_string :: String # :: String def pretty_date(date_string): import time # Try all the time formats until one works for parse_format in INPUT_TIME_FORMATS: try: timestamp = time.strptime(date_string, parse_format) return time.strftime("%x %X", timestamp) except ValueError: pass # Unable to parse. Just return the input string. #raise ("Couldn't parse '%s'" % date_string) return date_string # fin: input stream with Darcs' raw annotation output # fout: output stream for friendly annotation output def convert(fin, fout): patches = {} # :: {PatchInfo: Boolean} (logically a set) lines = [] # :: [(Line,PatchInfo)] # Read and ignore file-creation patch file_creation_patch = read_patch_info(fin) if file_creation_patch == None: sys.stderr.write("Error: expected file creation patch info.\n") return # Read in all the lines. while True: patch_info = read_patch_info(fin) if patch_info == None: break line = fin.readline() patches[patch_info] = True lines.append((line, patch_info)) # :: {PatchInfo : String} patch_nicknames = create_patch_nicknames(patches) fout.write("-- Patches --\n\n") # Dump (Nickname -> Patch) mapping longest_nickname = 1 for (patch_info, nickname) in patch_nicknames.iteritems(): fout.write(nickname + ": " + patch_info.author + ", " + pretty_date(patch_info.date) + "\n") #fout.write(nickname + ": " + patch_info.author + ", " + patch_info.date + "\n") fout.write(" * " + patch_info.short + '\n') for line in patch_info.long: fout.write(' ') fout.write(line) fout.write('\n') longest_nickname = max(longest_nickname, len(nickname)) bunch_of_spaces = ''.join([' ' for num in xrange(longest_nickname - 1)]) bunch_of_spaces += '^ ' fout.write("-- File Contents --\n\n") # Dump file content prev_patch_info = None for (line, patch_info) in lines: if (patch_info == prev_patch_info): # Coalesce fout.write(bunch_of_spaces) else: prev_patch_info = patch_info nickname = patch_nicknames[patch_info] padding = longest_nickname - len(nickname) fout.write(bunch_of_spaces[0:padding]) fout.write(nickname) fout.write(':') fout.write(line) # Try and create unique, easy-to-read names for each patch. This function # should try as hard as it can to create really good names. Right now, it # just uses a couple tokens from the 'author' field and appends a number to # disambiguate. # # patches :: {PatchInfo : Boolean} (logically a set) # :: {PatchInfo : String} def create_patch_nicknames(patches): patch_nicknames = {} nickname_counters = {} # :: {String : Integer} # Maximum number of chars in prefix. The appended digits could make # the nickname slightly longer, though. char_limit = 10 import re splitter = re.compile('[ \.<>@]*') nickname_counters[""] = 1 for patch_info in patches.iterkeys(): parts = splitter.split(patch_info.author) #print "parts =", parts if len(parts) == 0: prefix = "" elif len(parts) == 1: prefix = parts[0] else: # Use first component plus first letter of second component prefix = (parts[0]+parts[1][0]) # Limit prefix length prefix = prefix[0:char_limit] # Strip numbers from the end of the nickname. while (prefix[-1].isdigit()): prefix = prefix[0:-1] lcase_prefix = prefix.lower() if nickname_counters.has_key(lcase_prefix): # Some other patch is already using this nickname. count = nickname_counters[lcase_prefix] suffix = str(count+1) nickname_counters[lcase_prefix] = count+1 else: # We're the first patch to use this nickname nickname_counters[lcase_prefix] = 1 suffix = "" nickname = prefix + suffix patch_nicknames[patch_info] = nickname return patch_nicknames # Reads a line and drops the trailing newline def read_line(fin): line = fin.readline() if line.endswith('\n'): line = line[:-1] return line # Read a single patch info entry in Darcs' raw annotation output # Return a PatchInfo object or None if there's nothing to be read. # # :: Maybe PatchInfo def read_patch_info(fin): # Short patch name line = read_line(fin) if (line == ''): return None try: open_bracket_pos = line.index('[') except ValueError: return None # end of file short = line[open_bracket_pos+1:] # Author, Date line = read_line(fin) star_star_pos = line.index('**') author = line[2:star_star_pos] date = line[star_star_pos+2:] # Long comment long = [] try: close_bracket_pos = date.index(']') # If there is a close bracket, then there's no long description. # Just strip of the trailing ']' date = date[:close_bracket_pos] except ValueError: # Parse long description line = read_line(fin) while not line.startswith(']'): line = line[1:] # Strip leading space long.append(line) line = fin.readline() return PatchInfo(author, date, short, long) if __name__ == "__main__": main(sys.argv[0], sys.argv[1:])