#!/usr/bin/env python

__author__ = "Martin Sandve Alnes and Johan Hake"
__licence__ = "LGPL v3"
__copyright__ = "Copyright (2009) Martin Sandve Alnes"
__date__ = "2008-12-10 -- 2009-03-20"

import os
import subprocess

# Taken from http://ivory.idyll.org/blog/mar-07/replacing-commands-with-subprocess
from subprocess import Popen, PIPE, STDOUT
def run_command(cmd, cwd=None, env=None, stdout=None):
    if stdout is None:
        stdout = STDOUT
    pipe = Popen(cmd, shell=True, cwd=cwd, env=env, stdout=stdout, stderr=STDOUT)

    (output, errout) = pipe.communicate()
    assert not errout

    status = pipe.returncode
    return status

# Template for script sent to qsub
jobscript = """#!/bin/bash
%(pbs)s

# enter the job-directory
cd %(workdir)s

# add /usr/local/bin in path
PATH=$PATH:/usr/local/bin

# setup environment
%(paths)s
%(setup)s

# execute job
%(job)s
"""

def check_mem_argument(mem):
    allowed_mem_suffices = ["kb","mb","gb"]
    if not mem is None:
        assert isinstance(mem,str)
        if len(mem) < 3:
            return False
        if mem[-2:] not in allowed_mem_suffices:
            return False
        try:
            memory = int(mem[:-2])
            if memory < 0:
                return False
        except:
            return False
    return True

def sjoin(*items):
    "Join str() of all arguments."
    return "".join(map(str,items))

def submit(jobs, nodes=1, ppn=1, walltime=3, mem=None, vmem=None, name="myjob", workdir="$PBS_O_WORKDIR", email=None, paths=(), setup="", keep_environment=False, serial=None, nice=None, dryrun=False): 
    """Submit a sequence of commands to the pbs queue using qsub.
    
    Example use:
        from pjobs import submit, sjoin
        for n in (10,20,30):
            for h in (0.1, 0.01, 0.005):
                dt = 0.1*h
                jobs.append("pmpirun.openmpi mysimulator -n%d -h%f -dt%f" % (n, h, dt))
        submit(jobs, nodes=4, ppn=8, walltime=24*7)
    
    Arguments:
    @param jobs:
        A list of commands (strings) to pass to the queue for separate execution.
    @param nodes:
        Number of compute nodes to use.
        Default is 1 node.
    @param ppn:
        Number of processors per node to use.
        Default is 1 processor.
    @param walltime:
        Maximum amount of real-life time the job can use, in hours.
        Default is 3.
    @param mem:
        Optional maximum amount of physical memory the process will need.
        The syntax is mem='100mb', or mem='4gb', with allowed suffices 
        one of 'kb', 'mb' or 'gb' and only int values.
    @param vmem:
        Optional maximum amount of virtual memory the process will need.
        For syntax see mem.
    @param name:
        Basename of the job, just for the queue system.
        If a single string this will be combined with a
        counter to separate multiple jobs. A list or
        tuple of strings can be provided instead to
        define separate names for each job.
        Default is 'myjob'.
    @param email:
        Your email address if you want email when
        the jobs are started and stopped.
    @param workdir:
        The directory the jobs will be run in.
        Default is the place you commit the job from
        ($PBS_O_WORKDIR or . in a serial run).
    @param paths:
        A list of directories to add to the $PATH.
    @param setup:
        Optional additional job script contents for setting 
        up e.g. environment variables before each job.
    @param keep_environment:
        True if you want to copy all current environment
        variables to the job environment (PBS -V option).
        Default False.
    @param serial:
        True if you want to run the jobs as a serial sequence of processes
        instead of passing them to the qsub queue.
        Default is False, unless the environment variable PJOBS_SERIAL=1 
        is defined in which case the default is True.
    @param nice:
        Optional nice value (int) used for all jobs when running in serial.
        If provided, "nice -n %d" % nice is added before each job command.
    @param dryrun:
        True if you want to test this script but not send jobs to the queue.
    """
    if isinstance(jobs, str):
        jobs = [jobs]
    assert all(isinstance(job, str) for job in jobs)
    assert isinstance(nodes, int)
    assert isinstance(ppn, int)
    assert isinstance(walltime, (int, float))
    if not check_mem_argument(mem):
        raise TypeError, "Wrong format of mem attribute, see docstring" 
    if not check_mem_argument(vmem):
        raise TypeError, "Wrong format of vmem attribute, see docstring"
    assert isinstance(name, (str, list, tuple))
    assert isinstance(workdir, str)
    assert email is None or isinstance(email, str)
    if serial is None:
        # Get environment variable to toggle serial behaviour
        serial = bool(os.environ.get("PJOBS_SERIAL", False))
    assert isinstance(serial, bool)
    
    # Shared setup
    paths = "PATH=$PATH:" + ":".join(paths) if paths else ""
    
    # Modify options to make sense in serial
    if serial:
        if paths:
            print "TODO: Not using paths for serial job."
        if setup:
            print "TODO: Not using setup for serial job."
        if workdir == "$PBS_O_WORKDIR":
            workdir = os.getcwd()
            workdir = os.path.abspath(workdir)
        if nice is not None:
            jobs = ["nice -n %d %s" % (nice, job) for job in jobs]
    else:
        if workdir != "$PBS_O_WORKDIR":
            workdir = os.path.abspath(workdir)

    # Define range of names if only a single name is provided
    if isinstance(name, str):
        names = ["%s_%d" % (name, k) for k in range(len(jobs))]
    else:
        names = name

    # Enter workdir
    origdir = os.getcwd()
    try:
        if serial:
            os.chdir(workdir)
        # Queue sequence of jobs
        for job, jobname in zip(jobs, names):
            
            if serial:
                # Execute job in separate process
                cmd = job
                if dryrun:
                    print "NOT calling job command:", cmd
                else:
                    print "Calling job command:", cmd
                    # Pass process output to file f
                    f = open("%s_stdout" % jobname, "w")
                    run_command(cmd, stdout=f)
                    f.close()
            else: 
                # Generate job script
                pbs = []
                pbs.append("-j oe")
                pbs_resources = "-lnodes=%d:ppn=%d,walltime=%d:00:00" % (nodes, ppn, walltime)
                if mem:
                    pbs_resources += ",mem=" + mem
                if vmem:
                    pbs_resources += ",vmem=" + vmem
                pbs.append(pbs_resources)
                if email:
                    pbs.append("-m abe")
                    pbs.append("-M %s" % email)
                pbs.append("-N %s" % jobname)
                if keep_environment:
                    pbs.append("-V")
                pbs.append("")
                pbs = "\n".join("#PBS %s" % a for a in pbs)
                
                args = dict(pbs=pbs,
                            workdir=workdir,
                            paths=paths,
                            setup=setup,
                            job=job)
                script = jobscript % args
                
                # Write job script to file
                scriptfilename = "run_%s.sh" % jobname
                f = open(scriptfilename, "w")
                f.write(script)
                f.close()
                print "Wrote file ", scriptfilename
                
                # Queue the generated job script
                cmd = ["qsub"] + [scriptfilename]
                if dryrun:
                    print "NOT calling queue command:", " ".join(cmd)
                else:
                    print "Calling queue command:", " ".join(cmd)
                    subprocess.call(cmd)
                    os.unlink(scriptfilename)
    finally:
        os.chdir(origdir)

