# vim: sw=4 expandtab:
""" GeniConfiguration module
"""

from string import join, strip
import re
from os import mkdir
from MiscFunctions import *
from gtesterrc import *

defaultTimeout = '120'

# Global statistics is meant to help us know what statistics each one of our
# GenIConfigurations should be knowing about.
def globalStatistics(batchDir):
    stats = Statistics()
    for testCase in sortedListDir(batchDir):
        caseDir = "%s/%s" % (batchDir, testCase)
        stats.parseTests(testCase, caseDir + "/sentences")
    return stats

# ----------------------------------------------------------------------
# basic geni configuration (abstract class)
# ----------------------------------------------------------------------

class GenIConfiguration(object):
    def __init__(self, id, params, binName='geni'
                , optimisations = []
                , extraArgs = []
                , timeout = defaultTimeout):
        self.__id = str(id)
        self.__statisticsByBatch = {}
        self.__binDir = params.dirStructure.binDir
        self.__binLocation = '%s/%s' % (self.__binDir, binName)
        self.__optimisations = optimisations
        self.__extraArgs = extraArgs
        self.__timeout   = timeout

    def id(self):
        return self.__id

    def baseCommand(self):
        return self.__binLocation

    def requiredBinaries(self):
        return [self.__binLocation]

    def requiredFiles(self):
        return []

    def statsForBatch(self, batch):
        return self.__statisticsByBatch[batch]

    def sortedStatsKeys(self):
        k = self.__statisticsByBatch.keys()
        k.sort()
        return k

    def hasPreprocessor(self):
        return False

    def runOnBatch(self, batchDir, batchResponseDir):
        rollBar()
        for testFile in sortedListDir(batchDir):
            rollBar()
            self.run(batchDir, batchResponseDir, testFile)

    def cmdExtras(self, caseDir, caseResponseDir, testCase):
        return []

    def cmdFor(self, caseDir, caseResponseDir, testCase):
        return [ '"%s"' % self.baseCommand(),
                 '"--nogui"',
                 '--metrics="%s"' % ' '.join(STANDARD_METRICS),
                 '"--statsfile=%s/stats"' % caseResponseDir,
                 '--timeout=%s' % self.__timeout,
                 '+RTS -K100M -RTS'
                ] + self.cmdExtras(caseDir, caseResponseDir, testCase) + self.__extraArgs + self.__optimisations

    def run(self, batchDir, batchResponseDir, testCase):
        print "Running case %s" % testCase
        caseDir         = "%s/%s" % (batchDir, testCase)
        caseResponseDir = "%s/%s" % (batchResponseDir, testCase)
        mkdir(caseResponseDir)
        timeArgs        = ["-p"]
        timeResponse    = '"%s/time"' % caseResponseDir
        cmd  = self.cmdFor(caseDir, caseResponseDir, testCase)
        response = '%s/responses' % caseResponseDir
        geniExits = [2] # 2 is GenI's timeout code
        systemOrWarn("time", timeArgs + cmd, acceptedExits=geniExits, tries=5, stdout = response, stderr = timeResponse)

    def getConfiguration(self):
        return  [ ("Version", "0.10")
                , ("Extra parameters", " ".join(self.__extraArgs)) ]

    def parseBatchOutputFiles(self, batch, batchDir, batchResponseDir):
        initialDir = os.getcwd()
        chdirOrDie(batchResponseDir)

        # Create the statistics object for this number of clauses
        self.addNewStatsToBatch(batch)
        batchStats = self.statsForBatch(batch)
        for testCase in sortedListDir(batchDir):
          caseDir         = "%s/%s" % (batchDir, testCase)
          caseResponseDir = "%s/%s" % (batchResponseDir, testCase)
          batchStats.parseTime(testCase, caseResponseDir + '/time')
          #batchStats.parseTime(testCase, caseResponseDir + '/xmgchatter', prefix="lex")
          batchStats.parseResponses(testCase, caseDir + "/sentences", caseResponseDir + "/responses", caseResponseDir + "/stats")
        chdirOrDie(initialDir)

    def addNewStatsToBatch(self, batch):
        self.__statisticsByBatch[batch] = self.newStatistics()

    def addNewColumnToStatsFor(self, batch):
        self.__statisticsByBatch[batch].addNewColumn()

    def newStatistics(self):
        return Statistics()

# ----------------------------------------------------------------------
# vanilla geni configuration
# ----------------------------------------------------------------------

class GenIBasic(GenIConfiguration):
    def __init__(self, macros, lexicon, id, params, binName='geni'
                , optimisations = []
                , extraArgs = []
                , timeout = defaultTimeout ):
       self.__macros  = macros
       self.__lexicon = lexicon
       super(GenIBasic, self).__init__( id, params, binName, optimisations, extraArgs, timeout )

    def cmdExtras(self, caseDir, caseResponseDir, testCase):
        newExtra = [ '-s "%s/semantics"' % caseDir
                   , '-m "%s"'    % self.__macros
                   , '-l "%s"'    % self.__lexicon ]
        return super(GenIBasic, self).cmdExtras(caseDir, caseResponseDir, testCase) + newExtra

# ----------------------------------------------------------------------
# precompiled geni configuration
#
# uses a precompiled grammar
# ----------------------------------------------------------------------

class GenIPrecompiled(GenIConfiguration):
    def __init__(self, lexicon, id, params, binName='geni-precompiled'
                , optimisations = []
                , extraArgs = []
                , timeout = defaultTimeout ): # four minute timeout by default
       self.__lexicon = lexicon
       super(GenIPrecompiled, self).__init__( id, params, binName, optimisations, extraArgs, timeout )

    def cmdExtras(self, caseDir, caseResponseDir, testCase):
        newExtra = [ '-s "%s/semantics"' % caseDir
                   , '-l "%s"'    % self.__lexicon ]
        return super(GenIPrecompiled, self).cmdExtras(caseDir, caseResponseDir, testCase) + newExtra

# ----------------------------------------------------------------------
# batch geni configuration
#
# disables ability to individually run test cases
# ----------------------------------------------------------------------

class GenIBatch(GenIConfiguration):
    def __init__( self, macros, lexicon, suite
                , id, params, binName='geni'
                , optimisations = []
                , extraArgs = []
                , timeout = defaultTimeout ):
       self.__macros  = macros
       self.__lexicon = lexicon
       self.__suite   = suite
       self.__extraArgs = extraArgs
       self.__optimisations = optimisations
       super(GenIBatch, self).__init__( id, params, binName
                                      , optimisations, extraArgs, timeout )

    def runOnBatch(self, batchDir, batchResponseDir):
       # don't run the cases individually just do a big batch operation
       timeArgs        = ["-p"]
       timeResponse    = '"%s/time"' % batchResponseDir
       cmd  = [ '"%s"' % self.baseCommand()
              , '"--nogui"'
              , '--metrics="%s"' % ' '.join(STANDARD_METRICS)
              , '+RTS -K100M -RTS'
              , '--batchdir="%s"' % batchResponseDir
              , '-m "%s"'    % self.__macros
              , '-l "%s"'    % self.__lexicon
              , '-s "%s"'    % self.__suite
              ] + self.__extraArgs + self.__optimisations
       systemOrWarn("time", timeArgs + cmd, tries=2)

    def run(self, batchDir, batchResponseDir, testCase):
       print "Not expecting GenIBatch.run to be called!"
       sys.exit(1)

    def parseBatchOutputFiles(self, batch, batchDir, batchResponseDir):
        # same as vanilla parsing, but with no timing data
        initialDir = os.getcwd()
        chdirOrDie(batchResponseDir)

        # Create the statistics object for this number of clauses
        self.addNewStatsToBatch(batch)
        batchStats = self.statsForBatch(batch)
        for testCase in sortedListDir(batchDir):
          caseDir         = "%s/%s" % (batchDir, testCase)
          caseResponseDir = "%s/%s" % (batchResponseDir, testCase)
          batchStats.parseResponses(testCase, caseDir + "/sentences", caseResponseDir + "/responses", caseResponseDir + "/stats")
          # empty timing results
          batchStats.timingResults[testCase] = {}
          for k in batchStats.timeMetrics :
              batchStats.timingResults[testCase][k] = 0
        chdirOrDie(initialDir)


# ----------------------------------------------------------------------
# reading statistics in
# ----------------------------------------------------------------------

class Statistics:
    def __init__(self):
        self.timingResults = {}
        self.tests    = []
        self.metrics  = []  # used only to sort statistical output in the order given to GenI
        self.failList = {}
        self.passList = {}
        self.overgenList = {}
        self.expectedList = {}
        self.statsList = {}
        self.responseList = {}
        self.died = {}
        self.timeMetrics = [ 'real', 'user', 'sys' ]
        self.__timeParsers = [(p,self.compileTimeParser(p)) for p in self.timeMetrics ]\
                             + [(p,self.compileTimeParser2(p)) for p in self.timeMetrics ]

    def compileTimeParser(self,key):
        return re.compile(r'^%s\s*(\d+\.\d+)\s*$' % key)
    def compileTimeParser2(self,key):
        # i can't seem to integrate the bottom stuff into the regexp above...
        return re.compile(r'^%s\s*0m(\d+\.\d+)\s*s$' % key)

    INFINITY       = sys.maxint

    def addNewColumn(self):
        self.realtime.append(None)
        self.usertime.append(None)
        self.systime.append(None)
        self.answer.append(None)

    def parseTime(self, testCase, timeResponseFile, prefix=""):
        try:
            f = open(timeResponseFile, 'r')
            # skip lines until one of them matches the first line of timing data (FIXME: hacky, eh?)
            lastLine = ""
            lines = f.readlines()
            f.close()
            timeoutRe=re.compile('timed out after')
            if not self.timingResults.has_key(testCase):
                self.timingResults[testCase] = {}
            while (len(lines) > 0):
                lastLine = lines.pop()
                if timeoutRe.search(lastLine) != None:
                    self.died[testCase] = True
                for (key,p) in self.__timeParsers:
                    seekp = p.search(lastLine)
                    if seekp != None:
                        newkey = prefix+key
                        found = float(seekp.group(1))
                        self.timingResults[testCase][newkey] = found
        except:
            print "Error parsing '%s'" % timeResponseFile

    def parseTests(self, testCase, expectedFile):
        expected = self.swallowFile(expectedFile)
        self.tests += [testCase]
        self.expectedList[testCase] = expected
        self.statsList[testCase] = {}

    def parseResponses(self, testCase, expectedFile, responseFile, statsFile):
        self.parseTests(testCase, expectedFile)
        expected = self.expectedList[testCase]

        try:
          received = self.swallowFile(responseFile)
          received = filter ((lambda x: x != ""), received)
          stats    = self.swallowFile(statsFile)
          isDied = False
        except:
          isDied = True
          received = []
          stats    = []

        self.responseList[testCase] = received
        # compares expected results with responses responses from GenI returns
        #  - a list of sentences which fail (expected, not received)
        self.failList[testCase] = filter ((lambda x: x not in received), expected)
        #  - a list of sentences which pass (expected, received)
        self.passList[testCase] = filter ((lambda x: x in received), expected)
        #  - a list of sentences which are overgenerated (not expected, received)
        self.overgenList[testCase] = filter ((lambda x: x not in expected), received)
        # - a list of key/value tuples for the stastical info
        statsKeyRe = re.compile('(\S*)\s*:\s*(\S*)')
        if not self.died.has_key(testCase): self.died[testCase] = isDied

        if not isDied:
            # create the list of metrics if it is empty
            if len(self.metrics) == 0:
                for l in stats:
                    if statsKeyRe.search(l) != None:
                         self.metrics.append(statsKeyRe.match(l).group(1))
            # fill in the statistics list
            for l in stats:
                if statsKeyRe.search(l) != None:
                    m = statsKeyRe.match(l)
                    key = m.group(1)
                    value = m.group(2)
                    self.statsList[testCase][key] = value

    def swallowFile(self, file):
        # reads a file and returns its lines -
        # nothing could be simpler
        f = open(file, 'r')
        lines = map(strip, f.readlines())
        f.close()
        return lines

