# @(#) $Id: JobSection.py,v 1.144 2003/09/22 16:59:23 tlevshin Exp $
#
# $Author: tlevshin $
#
# $Log: JobSection.py,v $
# Revision 1.144  2003/09/22 16:59:23  tlevshin
# fixed unixID setup for pending section during recovery process
# skip checking for time limit for section that is done (Exited or Canceled)
#
# Revision 1.143  2003/08/22 16:25:23  tlevshin
# fixed typo
#
# Revision 1.142  2003/08/22 16:18:50  tlevshin
# more fixes for Power
#
# Revision 1.141  2003/08/22 14:39:47  tlevshin
# implemented realTimeLimit check based on node cpu power
#
# Revision 1.140  2001/12/17 19:53:46  tlevshin
# do not try to kill process which  status
# is unknown
#
# Revision 1.139  2001/12/17 18:48:00  tlevshin
# added exit code 120, when launcher failed to kill the process
#
# Revision 1.138  2001/12/17 17:01:49  tlevshin
# more printouts, do not continue with recovery if
# job is done (failed and NEED=1)
#
# Revision 1.137  2001/12/15 22:48:23  tlevshin
# *** empty log message ***
#
# $Log: JobSection.py,v $
# Revision 1.144  2003/09/22 16:59:23  tlevshin
# fixed unixID setup for pending section during recovery process
# skip checking for time limit for section that is done (Exited or Canceled)
#
# Revision 1.143  2003/08/22 16:25:23  tlevshin
# fixed typo
#
# Revision 1.142  2003/08/22 16:18:50  tlevshin
# more fixes for Power
#
# Revision 1.141  2003/08/22 14:39:47  tlevshin
# implemented realTimeLimit check based on node cpu power
#
# Revision 1.140  2001/12/17 19:53:46  tlevshin
# do not try to kill process which  status
# is unknown
#
# Revision 1.139  2001/12/17 18:48:00  tlevshin
# added exit code 120, when launcher failed to kill the process
#
# Revision 1.138  2001/12/17 17:01:49  tlevshin
# more printouts, do not continue with recovery if
# job is done (failed and NEED=1)
#
# Revision 1.136  2001/12/15 22:16:49  tlevshin
# added more printouts
#
# Revision 1.135  2001/12/15 18:17:52  tlevshin
# fixed log entry
#
# Revision 1.134  2001/11/14 17:40:49  tlevshin
# fixed deallocation of force allocted resource, when processes  exited  and Need is 1
#
# Revision 1.133  2001/10/29 19:21:05  tlevshin
# implemented non-blocking start for processes
#
# Revision 1.132  2001/10/09 16:32:36  tlevshin
# fixed  time limit checking
#
# Revision 1.131  2001/09/20 16:21:28  tlevshin
# handle correctly section with NUMPROC=0
#
# Revision 1.130  2001/09/13 19:35:53  tlevshin
# fixed kill-when-node-is-down down
#
# Revision 1.129  2001/08/20 23:09:57  tlevshin
# changes in start to be used with new resource manager
#
# Revision 1.128  2001/07/30 14:23:52  tlevshin
# fix the bug in checkTimes
#
# Revision 1.127  2001/04/10 18:36:57  tlevshin
# fix updateSecState
#
# Revision 1.126  2001/04/03 13:20:36  tlevshin
# implemented updateSecState method in Job, call G_EventManager sectionStateChanged from state method of Section
#
# Revision 1.125  2001/03/27 20:53:35  tlevshin
# *** empty log message ***
#
# Revision 1.124  2001/03/18 05:14:06  tlevshin
# addede OnNodes Section Data memeber
#
# Revision 1.123  2001/02/06 18:25:24  ivm
# Fixed yet another memory mishadnling in farm_history, simplified syntax
# Added canceled() dependency type
# Added job ranges in kill.py
# Show processes for just ended sections
# Fixed decoding of status/signal/core in Section
#
# Revision 1.122  2001/02/05 17:47:07  tlevshin
# change status to "exited" when the start up failed
#
# Revision 1.121  2001/02/05 16:39:11  ivm
# Do not kill sections with names starting with '_'
#
# Revision 1.120  2001/02/02 16:02:09  tlevshin
# fixed handling node failure during startup
#
# Revision 1.119  2001/01/24 21:23:29  tlevshin
# fix syntax error
#
# Revision 1.118  2001/01/23 14:48:59  tlevshin
# handle network glitches
#
# Revision 1.117  2001/01/10 17:09:16  tlevshin
# *** empty log message ***
#
# Revision 1.116  2001/01/10 16:52:26  tlevshin
# *** empty log message ***
#
# Revision 1.115  2001/01/10 16:42:33  tlevshin
# 1.fixed the problem with killed issued, after the job has been finished
# 2.take out print statment when bmgr could not write to a file
#
# Revision 1.114  2000/11/10 23:03:59  tlevshin
# create Job with username
#
# Revision 1.113  2000/11/01 20:14:17  tlevshin
# added Placement and Username, changed canAllocate call in order
# to have different node allocation method
#
# Revision 1.112  2000/10/25 21:15:31  tlevshin
#  added Username to Job and Section
#
# Revision 1.111  2000/10/03 22:04:20  tlevshin
#  use fbs_mics.sendMail method
#
# Revision 1.110  2000/10/03 15:57:56  tlevshin
# fixed duplicate call to terminate when  the process failed and it
# is last process
#
# Revision 1.109  2000/09/26 15:59:24  tlevshin
# time in sec in log file
#
# Revision 1.108  2000/09/22 14:36:20  tlevshin
# make some changes (based on code review recomendation): use update dict method,
# change format string to tuple (..,)
#
# Revision 1.107  2000/09/20 23:14:49  tlevshin
# fixed StartTiem fro canceled processes
#
# Revision 1.106  2000/09/06 21:49:31  tlevshin
# saved sectRsrcDict
#
# Revision 1.105  2000/09/06 20:54:55  tlevshin
# fixed import RM, and sys.exc_value
#
# Revision 1.104  2000/09/06 16:36:15  tlevshin
# *** empty log message ***
#
# Revision 1.103  2000/09/06 16:22:56  tlevshin
#  restored hecking for the last processf
# restored checking for the last process in start
#
# Revision 1.102  2000/09/06 15:29:58  tlevshin
# *** empty log message ***
#
# Revision 1.101  2000/09/06 14:57:24  tlevshin
# fixed bugs#9149,#19056 - deallocate resource when failed to start the job
# and NEED=0,put process number in log file
#
# Revision 1.100  2000/08/21 15:35:36  tlevshin
# use G_HistoryDB from bmgr_global, instantiate this just once
#
# Revision 1.99  2000/08/17 16:43:11  tlevshin
# "zombie" process is stored in History
#
# Revision 1.98  2000/08/09 19:07:30  tlevshin
# implemented History class
#
# Revision 1.97  2000/08/03 21:07:16  tlevshin
# more fixes
#
# Revision 1.96  2000/08/03 20:02:13  tlevshin
# continue fixes
#
# Revision 1.95  2000/08/03 18:47:22  tlevshin
# fixes for resource pool
#
# Revision 1.94  2000/08/03 13:59:40  tlevshin
# *** empty log message ***
#
# Revision 1.93  2000/08/01 14:01:47  tlevshin
# implemented resource pool allocation
#
# Revision 1.92  2000/07/03 21:14:09  tlevshin
# *** empty log message ***
#
# Revision 1.91  2000/07/03 21:04:33  tlevshin
# *** empty log message ***
#
# Revision 1.90  2000/07/03 20:52:46  tlevshin
# changed mail format, add log to
# mail, write actual exit code to the log file
#
# Revision 1.89  2000/06/26 16:33:52  ivm
# Fixed process time printing in Section
# Fixed archive interval calculation in Section
# Fixed command retrieval from Section
#
# Revision 1.88  2000/06/23 19:31:52  tlevshin
# implemented archiving
#
# Revision 1.87  2000/06/22 15:49:05  tlevshin
# changed exitCode defenition policy
#
# Revision 1.86  2000/06/21 15:20:13  tlevshin
# *** empty log message ***
#
# Revision 1.85  2000/06/21 15:13:17  tlevshin
# *** empty log message ***
#
# Revision 1.84  2000/06/21 15:02:33  tlevshin
# *** empty log message ***
#
# Revision 1.83  2000/06/21 14:40:47  tlevshin
# *** empty log message ***
#
# Revision 1.82  2000/06/20 20:37:26  tlevshin
# *** empty log message ***
#
# Revision 1.81  2000/06/20 18:51:34  tlevshin
# *** empty log message ***
#
# Revision 1.80  2000/06/20 18:28:57  tlevshin
# *** empty log message ***
#
# Revision 1.79  2000/06/20 16:39:58  tlevshin
# new version of process recovery
#
# Revision 1.78  2000/06/19 22:45:46  tlevshin
# *** empty log message ***
#
# Revision 1.77  2000/06/19 19:25:56  tlevshin
# *** empty log message ***
#
# Revision 1.76  2000/06/19 18:48:47  tlevshin
# *** empty log message ***
#
# Revision 1.75  2000/06/19 18:48:00  tlevshin
# *** empty log message ***
#
# Revision 1.74  2000/06/19 18:27:35  tlevshin
# fixed bugs related to chnages for recovery
#
# Revision 1.73  2000/06/19 14:46:46  tlevshin
# changes in recovery process
#
# Revision 1.72  2000/06/08 21:45:52  tlevshin
# will not handle database corruption
#
# Revision 1.71  2000/06/08 19:39:11  tlevshin
# added PrioInc, fixed bug when restoring from corrupted database
#
# Revision 1.70  2000/06/05 15:50:36  tlevshin
# fixed restoring from corrupted database,
# writing to user provided output file
#
# Revision 1.69  2000/05/31 16:17:18  tlevshin
# bug fixed
#
# Revision 1.68  2000/05/31 15:34:04  tlevshin
# *** empty log message ***
#
# Revision 1.67  2000/05/31 15:04:20  tlevshin
# added "|" as a last character in history file
#
# Revision 1.66  2000/05/31 14:54:54  tlevshin
#  write to Section Output file in parallel with log,
# handle database corruption
#
# Revision 1.65  2000/05/22 15:10:34  tlevshin
# fixed bug in startRecovery
#
# Revision 1.64  2000/05/12 21:51:57  tlevshin
# *** empty log message ***
#
# Revision 1.63  2000/05/12 21:48:13  tlevshin
# *** empty log message ***
#
# Revision 1.62  2000/05/12 21:39:20  tlevshin
# *** empty log message ***
#
# Revision 1.61  2000/05/12 21:32:26  tlevshin
# *** empty log message ***
#
# Revision 1.60  2000/05/12 21:17:32  tlevshin
# change history, add JDF Info to log
#
# Revision 1.59  2000/05/10 20:16:21  tlevshin
# implemented SectStdout, fixed HoldTime
#
# Revision 1.58  2000/05/05 21:53:16  tlevshin
# do not update database when the last process is over;
# wait until all deallocation is done
#
# Revision 1.57  2000/05/05 20:14:27  tlevshin
# modified mail message format; check for existence and permission
# of history file
#
# Revision 1.56  2000/05/03 19:11:05  tlevshin
# add LeaderOnly option
#
# Revision 1.55  2000/04/28 17:04:43  tlevshin
# added submission based on JDF sequence
#
# Revision 1.54  2000/04/28 15:11:19  tlevshin
# added JDFSeq to history, and changed all empty resources and  dependency to
# None for farm_history
#
# Revision 1.53  2000/04/26 20:21:03  tlevshin
# set UID when startProcess (lachif) returned it
#
# Revision 1.52  2000/04/21 17:49:29  tlevshin
# changed "Running" status for user process to "running"
# add NProc to history
#
# Revision 1.51  2000/04/18 22:11:04  tlevshin
# added default for hist_dir,hist_file
#
# Revision 1.50  2000/04/17 22:23:27  tlevshin
# close the log every time
#
# Revision 1.49  2000/04/14 21:39:50  tlevshin
#  a
#
# Revision 1.48  2000/04/14 19:02:04  tlevshin
# added cpu,reason in procExit
# modified history again (back to flat file)
# fixed return value in case of multiple kills
#
# Revision 1.47  2000/04/06 19:43:38  tlevshin
# *** empty log message ***
#
# Revision 1.46  2000/04/06 19:23:03  tlevshin
# fixed holdTime /log bug
#
# Revision 1.45  2000/04/03 19:33:57  tlevshin
# *** empty log message ***
#
# Revision 1.44  2000/04/03 19:06:39  tlevshin
# change the name for UserProcess
# fixed Need (Need=0 (default) - do not kill the processes)
# unsibscribe when section is finished
#
# Revision 1.43  2000/03/31 15:14:12  ivm
# Ignore procExit() in Section for non-running processes
#
# Revision 1.42  2000/03/29 22:59:02  ivm
# Added Tracer code
#
# Revision 1.41  2000/03/28 20:02:20  ivm
# Modified History and implemented some history functionality in FBSClient
#
# Revision 1.40  2000/03/23 21:48:43  tlevshin
# added History class
#
# Revision 1.39  2000/03/17 16:06:29  tlevshin
# change dependency evaluation using DepExpression class
#
# Revision 1.38  2000/03/14 19:25:57  tlevshin
# fixed Hold (None - do not hold, -1 - hold forever)
#
# Revision 1.37  2000/03/14 18:44:10  tlevshin
# fix kill retVal, HoldTime, log output
#
# Revision 1.36  2000/03/07 19:46:54  tlevshin
# import sys
#
# Revision 1.35  2000/03/02 22:09:38  tlevshin
# added logging and mail
#
# Revision 1.34  2000/02/28 15:26:44  tlevshin
# Section.kill returns (retVal,reason)
# Job.hold(holdTime=None)
#
# Revision 1.33  2000/02/23 21:17:44  tlevshin
# *** empty log message ***
#
# Revision 1.32  2000/02/23 20:27:49  tlevshin
# *** empty log message ***
#
# Revision 1.31  2000/02/23 20:21:48  tlevshin
# 1.continue job submission when need=1 and some
# hosts failed to start job.
# 2.use "job retention interval" to delete job from database
# 3.QIndex
#
# Revision 1.30  2000/02/23 18:41:22  ivm
# Import bmgr_global
#
# Revision 1.29  2000/02/21 20:58:25  tlevshin
# fixed some bugs with section state
#
# Revision 1.28  2000/02/21 17:35:16  tlevshin
# *** empty log message ***
#
# Revision 1.27  2000/02/21 17:32:18  tlevshin
# *** empty log message ***
#
# Revision 1.26  2000/02/21 17:21:00  tlevshin
# 1.start process number with 1, integer key as a process number
# 2. add hostList to startProcess call for lchif
# 3. get rid of multiple retry for node allocation if process
# failed to start
# 4.send killProcess just once for the section
#
# Revision 1.25  2000/02/15 17:18:45  tlevshin
# *** empty log message ***
#
# Revision 1.24  2000/02/15 16:36:56  tlevshin
# added addSection call to startRecovery
# changed addSection to submitSection in submit method
#
# Revision 1.23  2000/02/14 22:24:04  tlevshin
# fixed NodeUP
#
# Revision 1.22  2000/02/14 19:22:12  tlevshin
# start end Recovery only for running Jobs
#
# Revision 1.21  2000/02/14 18:59:30  tlevshin
# *** empty log message ***
#
# Revision 1.20  2000/02/14 17:57:02  tlevshin
# *** empty log message ***
#
# Revision 1.19  2000/02/14 17:55:32  tlevshin
# *** empty log message ***
#
# Revision 1.18  2000/02/14 16:45:58  tlevshin
# *** empty log message ***
#
# Revision 1.17  2000/02/14 16:27:14  tlevshin
# added, clarify print statements
#
# Revision 1.16  2000/02/10 21:42:39  tlevshin
# added check for state in endRecovery, add prinJob,printSection methods etc
#
# Revision 1.15  2000/02/09 23:44:10  tlevshin
# *** empty log message ***
#
# Revision 1.14  2000/02/09 21:41:03  tlevshin
# *** empty log message ***
#
# Revision 1.13  2000/02/09 21:02:36  tlevshin
# multiple bug fixes
#
# Revision 1.12  2000/01/31 22:46:54  ivm
# Fixed some typos
#
# Revision 1.10  2000/01/31 21:39:22  ivm
# Fixed more trivial bugs
#
# Revision 1.9  2000/01/31 20:56:01  ivm
# Fixed minor bugs
#
# Revision 1.8  2000/01/28 21:41:49  tlevshin
# *** empty log message ***
#
# Revision 1.7  2000/01/27 20:25:26  tlevshin
# fix some syntax bugs
#
# Revision 1.6  2000/01/26 21:28:23  tlevshin
# add  some global references
#
# Revision 1.5  2000/01/26 21:12:26  tlevshin
# initial implementaion (continue)
#
# Revision 1.4  2000/01/24 21:45:47  tlevshin
# continue initail implementation
#
# Revision 1.3  2000/01/19 19:04:57  tlevshin
# initial implementaion (add several functions)
#
# Revision 1.2  2000/01/06 15:21:31  ivm
# empty
#
# Revision 1.1  1999/12/24 17:10:27  ivm
# Added files
#
#


			
#G_ResourceManager - resource Manager
#G_LauncherIF -Launcher Interface
#G_JobFinderJ- job finder
#G_QueueFinder- Queue Finder
#DBSection - database
#G_ServerCfg - configuration
#G_JobDB - JobDB


import sys
import os
import time
import string

import fbs_misc
from SectParam import SectParam
from DepParser import DepExpression
from History import History
import bmgr_global
import RM

#*************************************************
# Job - FBS job: collection of FBS sections      *
# data members:                                  *
#     ID - job ID name.jid                       *
#     Sections-section dictionary {id:section}   *
#     UID - user unix id                         *
#     GID - user group unix id                   *
#     Username - unix user name                  *
#     EndTime - job ending time (in sec)         *
#     RtnPeriod - time in sec to keep job info   *
#                 in memory                      *
# methods:                                       *
#     setUser - set user and group unix id       *
#     sections - returns list of job sections ids*
#     hasSection - checks if specified section   *
#                  exist                         *
#     state - returns job state [active,done]    *
#     hold - holds pending job                   *
#     release - releases held job                *
#     kill - initiates killing process of running*
#            sections and canceling pending ones *
#     checkTimes-checks if job excedeed timelimit*
#     removeFromQueue - initiates section removal*
#                       from queues              *
#     delAllSections - initiates section removal *
#                      from memory               *
#     updateState - sets the endTime for done job*
#     updateSecState -update the state of section*
#     endRecovery -initiates section end recovery*
#     submit - initiates sections submission     *
#*************************************************

class	Job:
    def __init__(self, jid,uid=0,guid=0,name='unknown'):
	self.ID = jid
	self.Sections = {}
	self.UID = uid
	self.GID = guid
	self.Username=name
	self.EndTime=0
	self.RtnPeriod=bmgr_global.G_ServerCfg.getValue('bmgr','*','job_retention_interval',10*60)
    def __getitem__(self, sname):
	return self.Sections[sname]
		
    def __setitem__(self, sname, s):
	self.Sections[sname] = s
		
    def __delitem__(self, sname):
	del self.Sections[sname]


    def setUser(self,uid,gid):
	self.UID=uid
	self.GID=gid

    def sections(self):
       	return self.Sections.keys()

    def hasSection(self,sname):
	return self.Sections.has_key(sname)
		
    def state(self):
       	for s in self.Sections.values():
	    if s.state() in ['waiting','ready','running']:
		return 'active'
       	return 'done'

    def hold(self,holdTime=None):
	for s in self.Sections.values():
	    s.hold(holdTime)

    def release(self):
	for s in self.Sections.values():
	    s.release()

    def kill(self,flag=0, all=0):
	for s in self.Sections.values():
	    if all or s.Name[0] != '_':	s.kill(flag)

    def checkTimes(self):
	for s in self.Sections.values():
	    s.checkTimes()
	if self.state()=='done':
	    if (self.EndTime+self.RtnPeriod) <= time.time():
		self.delAllSections()
    def removeFromQueue(self):
	for s in self.Sections.values():
	    s.removeFromQueue()

    def history(self):
	for s in self.Sections.values():
	    s.history()


    def delAllSections(self):
	for s in self.Sections.values():
	    s.destroy()
	del bmgr_global.G_JobFinder[self.ID]      
	bmgr_global.G_EventManager.jobDeleted(self.ID)
    def updateState(self):
	state=self.state()
	if state=="done":
	    self.EndTime=int(time.time())
	return
    def updateSecState(self):
	for s in self.Sections.values():
	    s.state()
    def endRecovery(self):
	for s in self.Sections.values():
	    s.endRecovery()

    def submit(self):
	seq=[]
	for i in range(len(self.Sections)):
	    seq.append("")
	for name,s in self.Sections.items():
	    seq[s.SectParam.JDFSeq-1]=name
	
	for name in seq:
	    self.Sections[name].submit()

class	DBSection:
    def __init__(self):
	pass

#************************************************************************
#               Class UserProcess                                       *
#data members:                                                          *
#         ProcNo - process number                                       *
#         Node - node allocated for process to run                      *
#         UPID - unix process id                                        *
#         ExitCode - process exit code                                  *
#         Signal - flag that indicates if process finished with signal  *
#         Core - flag that indicates that core dump exists              *  
#         CPUTime - cpu time utilized by process                        *
#         Status - process status [running,finished]                    *
#         EndTime  - process ending time (in sec)                       *
#         StartTime - process starting time (in sec)                    *  
#         localRsrcDict - resorce dictionary translation from resPool   *
#                         for local resources                           *
#         globalRsrcDict - resorce dictionary translation from resPool  *
#                         for global resources                          *
#methods:                                                               *
#         setStatus                                                     *
#         setUnixID                                                     *
#         setNode                                                       *
#         setStartTime                                                  *
#         setExitCode                                                   *
#         setEndTime                                                    *
#         setCpu                                                        *
#         setLocalRsrc                                                  *
#         setGlobalRsrc                                                 *
#************************************************************************
class UserProcess:
    def __init__(self,pid=0,node="",unid=0,code=0,cpu=0,end=None,status="running"):
	self.ProcNo=pid
	self.Node=node
	self.UPID=unid
	self.EndTime=end
	self.setExitCode(code)
	self.CPUTime=cpu
	self.Status=status
	self.StartTime=None
	self.localRsrcDict={}
	self.globalRsrcDict={}
	self.Power=0

    def setStartTime(self):
	self.StartTime=int(time.time())
    def setStatus(self,status):
	self.Status=status
    def setUnixId(self,unid):
	self.UPID=unid
    def setNode(self,node):
	self.Node=node
	#set node power in order to calculate execution elapsed time later
	cn = bmgr_global.G_ResourceManager.getClassOfNode(self.Node)
	junk, junk, junk, power = bmgr_global.G_ResourceManager.getNodeClass(cn)
	self.Power=power
    def setExitCode(self,code,reason=""):
	self.Reason=reason
	if code==None:
	     self.ExitCode,self.Core,self.Signal=-1,0,0
	else:
	    self.ExitCode=code>>8           #based on python library
	    self.Signal=code & 0x00ff     #16 bit [exit code][signal]
	    self.Core=self.Signal >> 7    #1 bit of lower byte is core
	    self.Signal=self.Signal & 0x7f
	    if not self.ExitCode:
		if self.Signal or self.Core:
		    self.ExitCode=1
    def setEndTime(self):
	self.EndTime=int(time.time())
    def setCpu(self,cpu):
	self.CPUTime=cpu
    def setLocalRsrc(self,dict):
	self.localRsrcDict.update(dict)
    def setGlobalRsrc(self,dict):
	self.globalRsrcDict.update(dict)
#*************************************************************************
#                       Class Section                                    *
#Name   Type   Sec.Access Oth.Access  Storage      Description           *
#ID   String     Read          Read     -           jobId.sectName       *
#Name String     Read          Read     +          section name          *
#JID  String     Read          Read     +          jobId                 *
#Depend {}       Read          Read     +         {sectionName:dep_type} *
#                                                  where dep.type        *
#                                             ['started','done','exited',*
#                                              'ended','zombie']         *
#NProc Int       Read         Read      +          num of processes      *
#ProcType String Read         Read      +          process type          *
#UID   Int       Read         Read      +          unix user id          *
#GID   Int       Read         Read      +          unix group id         *
#Username String Read         Read      +          unix user name        *
#Placement String Read        Read      +       node selection method    *
#Queue String    Read         Read      +          queue                 *
#PerProcLocal {} Read         Read      +       local resources per proc *
#PerProcGlobal {}Read         Read      +       global resources per proc*
#PerSectGlobal {}Read         Read      +       global resources per sec *
#Exec  String    Read         Read      +       user exec+ arguments     *
#Stdout String   Read         Read      +       user output file         *
#Stderr String   Read         Read      +          user error file       *
#Need  Int       Read         Read      +          user kill option      *
#Nice  Int       Read         Read      +          user nice option      *
#SubTime Date    Write        Read      +          submission time       *
#HoldTime Date   Write        Read      +          hold time             *
#StartTime Date  Write        Read      +          start time            *
#EndTime Date    Write        Read      +          end time              *
#Prio  Int       Write        Read      +          section priority      *
#State String    Write        Read      +          section state         *
#                                                 'Pending','Cancelled', *
#                                                 'Running','Exited'     *
#RealTime Int    Read          -        -       real time execution limit*
#ExitCode Int    Write        Read      +           section exit code    *
#**************************************************************************
class	Section:
    _SecInfoField=['State','Prio','PGap','PrioInc','QIndex','SubTime','StartTime','EndTime','ExitCode','NodeList','sectRsrcDict']
    def __init__(self,sectId,params=None):
	self.ID=sectId
	self.JID,self.Name=fbs_misc.decodeDotID(self.ID)
	self.State="Pending"
	self.Username='unknown'
	self.Placement='round-robin'
	self.OnNodes=None
	self.Prio=1 
	self.PGap=0 
	self.StartTime=None
	self.SubTime=int(time.time())
	self.EndTime=None
	self.ExitCode=None
	self.PrioInc=None
	self.lastState=None
	self.Procs={}
	self.NodeList=[]
	self.sectRsrcDict={}
	self.killSend=0
	self.SectOutput=None
	self.SectParam=None
	self.setParam(params)
#**************************************************************
#setParam: initiates some of the data memebers using SectParam*
# instantiates UserProcess objects                            *
#**************************************************************
    def setParam(self,param):
	# if param is None, section will be restored from database (fromDB)
	if not param:
	    return
	self.SectParam=param
	self.PerSectGlobal=param.PerSectGlobal
	self.PerProcLocal=param.PerProcLocal
	self.PerProcGlobal=param.PerProcGlobal
	self.ProcType=param.ProcType
	self.NProc=param.NProc
	self.UID=param.UID
	self.GID=param.GID
	self.Need=param.Need
	self.Depend=param.Depend
	self.DepExp=DepExpression(param.Depend)
	self.Queue=param.Queue
	self.PrioInc=param.PrioInc
	self.Username=param.Username
	self.Placement=param.Placement
	self.OnNodes=param.OnNodes
	for i in range(0,self.NProc):
	    Proc=UserProcess(i+1,"",0,0,0,None,"pending")
	    self.Procs[i+1]=Proc
	#try to create name for section output
	if param.SectOutput:
	    try:
		self.SectOutput=fbs_misc.expandPattern(self.UID, param.SectOutput, self.JID, self.Name, 'FBS_%j.log')
	    except:
		self.SectOutput=None
		self.log("Failed to expend section output name: %s, Reason: %s,%s" % (param.SectOutput,sys.exc_type, sys.exc_value))

#**************************************************************
# toDB: saves section related data in database                 *
#**************************************************************
    def toDB(self):
	dbs = DBSection()
	dbs.SectParam=self.SectParam
	for fn in self._SecInfoField:
	    dbs.__dict__[fn] = self.__dict__[fn]
	dbs.Procs = {}
	dbs.Procs.update(self.Procs)
	return dbs
				
#**************************************************************
# fromDB: restores section related data from database         *
#**************************************************************
    def fromDB(self, dbs):
	param=dbs.SectParam
	self.setParam(dbs.SectParam)
       	for fn  in self._SecInfoField:
	    self.__dict__[fn]= dbs.__dict__[fn]
	self.Procs = {}
	# unpack processes here
	for index in dbs.Procs.keys():
	    self.Procs[index]=dbs.Procs[index]
	self.createJob()
		
#**************************************************************
# getTotalProcInState: calculates the number of processes in  *
# specified state                                             *
#**************************************************************
    def getTotalProcInState(self,state):
	i=0
	for proc in self.Procs.values():
	    if proc.Status == state :
		i=i+1
	return i
#**************************************************************
# createJob: creates job if this is first section restored    *
#from database, else add itself to the job.Sections           *
#**************************************************************
    def createJob(self):
	if not bmgr_global.G_JobFinder.hasJob(self.JID):
	    job=Job(self.JID,self.UID,self.GID,self.Username)
	    job[self.Name]=self
	    bmgr_global.G_JobFinder[self.JID]=job
	else:
	    job=bmgr_global.G_JobFinder[self.JID]
	    #job.setUser(self.UID,self.GID)
	    job[self.Name]=self

#*************************************************************
#startRecovery:     allocate global and local resources      *
#                   for all "pending" or "running" processes *
#                   for section resored from database        *
#*************************************************************

    def startRecovery(self):
	queue=bmgr_global.G_QueueFinder[self.Queue]
	queue.addSection(self) 

	if self.State!='Running':
            self.log("Start Recovery:section state %s: nothing to recover" % ( self.State,))
            return

	numActive=self.NProc-self.getTotalProcInState("exited")
        self.log("Start Recovery:section state %s ,num of active process %s"\
		 % ( self.State,numActive))

	if numActive:
	    
	    try:
		bmgr_global.G_ResourceManager.forceAllocateGlobal\
		     (self.PerSectGlobal,self.ProcType,self.sectRsrcDict)
		self.log("Force allocation section global resources:%s, process type %s, resource-pool %s" % \
		     (self.PerSectGlobal,self.ProcType,self.sectRsrcDict))
	    except RM.Invalid_ProcType:
		self.log("Force allocation failed for section global resources,process type does not exist :%s" % self.ProcType)
	    except:
		self.log("Force allocation failed for section global resource: %s,%s" % (sys.exc_type, sys.exc_value))
	    #can not do anything if configuration changed so badly
		

	for ind,proc in self.Procs.items():
	    if proc.Status=="exited": continue
	    #subscribes to LIF to get notification about this process
	    bmgr_global.G_LauncherIF.subNodeNotify(self.ID,proc.Node)
	    try:
		bmgr_global.G_ResourceManager.forceAllocateGlobal\
		     (self.PerProcGlobal,self.ProcType,proc.globalRsrcDict)
		self.log("Force allocation global resources: %s,%s process type %s for process %s on node %s" %\
		     (self.PerProcGlobal,proc.globalRsrcDict,self.ProcType,\
		      ind,proc.Node))
	    except RM.Invalid_ProcType:
		self.loge("Force allocation failed for process global resources,process type does not exist :%s" % (self.ProcType,))
	    except:
		self.log("Force allocation failed for process global resource: %s,%s" % (sys.exc_type, sys.exc_value))
	    #can not do anything if configuration changed so badly
	    try:
		bmgr_global.G_ResourceManager.forceAllocateLocal(proc.Node,\
		     self.PerProcLocal,self.ProcType,proc.localRsrcDict)
		self.log("Force allocation local resources: %s,%s process type %s for process %s on node %s" %\
		     (self.PerProcLocal,proc.localRsrcDict,self.ProcType,\
		      ind,proc.Node))
	    except RM.Unknown_Host:
		self.log("Force allocation failed for node:%s" % (proc.Node,))
	    except RM.Invalid_ProcType:
		self.log("Force allocation failed for process local resources,process type does not exist :%s" % (self.ProcType,))
	    except:
		self.log("Force allocation failed for process local resource: %s,%s" % (sys.exc_type, sys.exc_value))
            #can not do anything if configuration changed so badly	
#*************************************************************
#endRecovery:       starts all section process that are in   *
#                   Pending state                            *
#*************************************************************

    def endRecovery(self):
	job=bmgr_global.G_JobFinder[self.JID]
	self.log("Start end recovery, job State is %s"\
			     % (job.state(),))
	if self.Need:
	    if job.state()=="done":
	   	 if not job.EndTime:
			job.EndTime=int(time.time())
			return

	if self.State=="Pending":
	    self.log("End recovery")
	elif self.State=="Running":
	    for i in range(0,len(self.Procs)):
		if self.Procs[i+1].Status=="pending":
		    self.log("End recovery, will start pending process %s"\
			     % (i+1,))
		    self.startProcess(i+1)
		    if job.state()=="done":
			self.log("Job is finished, no reason to continue with recovery")
			return
	return
	    

#**********************************************
#Method: hold                                 *
#set HoldTime to prevent section from starting*
#Arguments: holdTime None "do not hold"       *
#                    time "hold until time"   *
#                     -1  "hold forever"      *
#**********************************************
    def hold(self,holdTime=None):
	self.SectParam.HoldTime=holdTime
	if holdTime==-1:
		hold="forever"
	elif holdTime==None:
		hold="never"
	else:
		hold=repr(time.ctime(holdTime))
	self.log("Hold the section until %s" % (hold,))
	bmgr_global.G_JobDB.saveSection(self)#update database


#***********************************************
#Method: release                               *
#set HoldTime to None to allow section to start*
#when it is ready                              *
#***********************************************
    def release(self):
	#update database
	self.SectParam.HoldTime=None
	self.log("Release the section")	
	bmgr_global.G_JobDB.saveSection(self)#update database
	

#************************************************
#Method: checkTimes                             *
#check real time limit and kill the processes if*
#it is exceeded                                 *
#************************************************
    def checkTimes(self):
	if not self.StartTime:
	    return
	if self.State in ["Exited","Canceled"]:
	    return
	timeNow=int(time.time())
	if self.SectParam.RealTimeLimit!=-1:
	    #we have to check that each process does not exceed cpu limit
	    for ind,proc in self.Procs.items():
		#get node power
		if proc.Power==0:
		   continue   #power is yet unknown
		
		timeLimit=self.SectParam.RealTimeLimit/proc.Power
		if timeLimit < (timeNow-self.StartTime):
		    self.log("Process #%d is exceeded time limit on node %s, sending signal to kill this processes" % (ind,proc.Node))
		    rtVl,rsn=bmgr_global.G_LauncherIF.killProcess\
			      (proc.Node,self.ID,ind,0)
		    if not rtVl:
			self.log("Process %s on node %s: %s" % \
				 (ind,proc.Node,rsn))

#************************************************
#Method: nodeUp                                 *
#restore the process running on the node        *
#Arguments: nname -      node name              *
#	  dict-[ProcNum1:UnixID,ProcNum2:UnixID]*
#         processes that are running on this    *
#         node                                  * 
#************************************************
    def nodeUp(self,nname,dict):
	procList={}
	for name,unixID in dict.items():
	    id=fbs_misc.decodeDotID(name)[2]
	    procList[id]=unixID
	for ind,proc in self.Procs.items():
	    if proc.Node==nname:
		if ind in procList.keys():
		    if proc.Status=="pending":
			proc.setStatus("running")
			proc.setUnixId(procList[ind])
			self.log("Node %s is up and process %s  running with unixid %, recoverd from pending state" % \
				 (nname,ind,procList[ind]))
		    elif proc.Status=="running":
			#case of short network glitch, section is still alive
			self.log("Node %s is now up and process %s is still running" % \
				(nname,ind))
		    else:
			#case of long network glitch, when section is over
			self.log("Node %s is up and process %s is alive, but bmgr has to kill it" % \
				 (nname,ind))

			rtVl,rsn=bmgr_global.G_LauncherIF.killProcess\
			      (nname,self.ID,ind,0)
			if not rtVl:
			    self.log("Process %s on node %s: %s" % \
				     (ind,nname,rsn))
		      
		else:
		    self.log("Node %s is up but process %s is not running" % (nname,ind))
		    if proc.Status=="running":
			self.procExit(ind,1)    
#************************************************
#Method: nodeDown                                *
#set process parameters and status if the node   *
#went down                                       *
#Arguments: nname -  node name                   *
#************************************************
    def nodeDown(self,nname):
	for ind,proc in self.Procs.items():
	    if proc.Node==nname:
		self.log("Node %s is down, process %s" % \
			  (nname,ind))
		self.procExit(ind,1)
	
#************************************************
#Method: ifLastProc                              *
#check if the finished process was the last one  *
#then terminate section                          *
#*************************************************
    def ifLastProc(self):
	if self.getTotalProcInState("running")==0 and   \
	   self.getTotalProcInState("zombie")==0: #still waiting for this process
	    return 1
	return 0

#************************************************
#Method: procExit                                *
#set process parameters and status               *
#when process finishes                           *
#Arguments: procNum -  process num               *
#           exit - process exit code             *
#           cpu - process cpu time utilization   *
#           reason - reason of failure           *
#*************************************************
    def procExit(self,procNo,exit=0,cpu=0,reason=""):
	proc=self.Procs[procNo]
	self.log("Recieved information about %s on %s, current  status %s" % (procNo,proc.Node,proc.Status))
	if proc.Status !='exited': #launcher could repeat  the same info
		self.deallocate(procNo)
		proc.setStatus("exited")
		proc.setExitCode(exit,reason)
		proc.setEndTime()
		proc.setCpu(cpu)
		self.log("Process %s exit with exit code %s cpu %s reason %s, deallocate resource on %s" %  (procNo,proc.ExitCode,cpu,reason,proc.Node))
		bmgr_global.G_EventManager.procExited(self.ID,procNo)
		if self.ifLastProc():
		    self.log("(procExit)Process %s is last, will terminate" % (procNo,)) 
		    self.terminate() # do not update database in case of last process, will do it later
		    return 
		else:

		    bmgr_global.G_JobDB.saveSection(self)
		    #update database
			
		if self.SectParam.LeaderOnly and procNo==1:
		    self.kill(0)  
                    #if it was "the leader" we do not care about the rest
		else:
		    if proc.ExitCode:
			if self.Need:	
			    self.kill(0)
			    #if process failed kill all others
	

#*************************************************
#Method: kill                                    *
#send request to kill all the process            *
#*************************************************
    def kill(self,killFlag=0):
	retVal=1
	reason=""
	if not self.killSend:
	    self.killSend=1
	    if self.State in ["Exited","Canceled"]:
                return retVal,reason
	    if self.State=="Pending":
	   	self.log("section  is canceled")
		self.State="Canceled"
		job=bmgr_global.G_JobFinder[self.JID]
		job.updateSecState()
	    	self.terminate()
		return retVal,reason
	    for ind,proc in self.Procs.items():
		self.log("(kill)Process %s on node %s has status %s" %\
			     (ind,proc.Node,proc.Status))
		if proc.Status=="pending":
		    proc.setStatus("exited")
		    self.deallocate(ind)
		elif proc.Status=="running":
		    self.log("Process %s on node %s is being killed" %\
			     (ind,proc.Node))
		    rtVl,rsn=bmgr_global.G_LauncherIF.killProcess\
			      (proc.Node,self.ID,ind,killFlag)
		    if not rtVl:
			retVal=rtVl
			self.log("Failed to kill process %s on node %s" %\
				 (ind,proc.Node))
			self.procExit(ind,256*120,0,rsn)
	    if self.ifLastProc():
		self.log("(kill)last process, will terminate") 
		self.terminate()
	    return retVal,reason
	else:
	    return 1,"job is already canceled"

#*************************************************
#job: returns Job Object                         *
#*************************************************
    def job(self):
	return bmgr_global.G_JobFinder[self.JID]

#*************************************************
#deallocate: deallocate all resources allocated  *
#for specific process                            *
#*************************************************
    def deallocate(self,id):
	node=self.Procs[id].Node
	localRsrcs=self.Procs[id].localRsrcDict
	try:
	    bmgr_global.G_ResourceManager.deallocateLocal\
	      (node,self.PerProcLocal,self.ProcType,localRsrcs)
	    self.log("Deallocate local resources %s on node %s" % \
	      (localRsrcs,node))
	except RM.Unknown_Host:
	    self.log("Failed to deallocate node %s" % (node,))
	except RM.Invalid_ProcType:
	    self.log("Failed deallocate local resources with proc type %s" % \
	      (self.ProcType,))
	except:
	    self.log("Failed to deallocate local resources %s on node %s" % \
	      (localRsrcs,node))

	globalRsrcs=self.Procs[id].globalRsrcDict
	try:
	    bmgr_global.G_ResourceManager.deallocateGlobal\
	      (self.PerProcGlobal,self.ProcType,globalRsrcs)
	    self.log("Deallocate global resources %s,%s" %\
		     (self.PerProcGlobal,globalRsrcs))
	except RM.Invalid_ProcType:
	    self.log("Failed deallocate global resources with proc type %s" % \
	      (self.ProcType,))
	except:
	    self.log("Failed to deallocate global resources %s,%s" % \
		     (self.PerProcGlobal,globalRsrcs))
#*************************************************
#startProces: starts one process at a time after *
#recovery                                        *
#*************************************************
    def startProcess(self,id):
	proc=self.Procs[id]
	node=proc.Node
	proc.setStatus("pending")
	bmgr_global.G_JobDB.saveSection(self) #update database
	proc.setStartTime()
	job=bmgr_global.G_JobFinder[self.JID]
	retVal,reason=bmgr_global.G_LauncherIF.startProcess\
		(node,self.ID,id,self.SectParam,self.NodeList,\
		self.PerProcLocal,self.sectRsrcDict,proc.globalRsrcDict,\
		proc.localRsrcDict)
	
	if retVal==0: #failed to start the process
	    self.log("Failed to start process %s on node %s: reason %s" % (id,node,reason))
	    proc.setStatus("exited")
	    self.deallocate(id)
	    if self.ifLastProc():
		self.log("(startProcess)Process %s is last, will terminate" % (id,)) 
		self.terminate()
	    if self.Need: #can not continue
		#self.State="Exited"
		#job.updateSecState()
		#self.ExitCode=-1
		bmgr_global.G_JobDB.saveSection(self)
		self.kill()
		return 0
	else :
	    self.log("Started process %s on node %s with unix id %s" %\
		     (id,node,retVal))
	    proc.setStatus("running")
	    proc.setUnixId(retVal)
	    self.State="Running"
	    job.updateSecState()
	    bmgr_global.G_JobDB.saveSection(self)
	    #update database
	    bmgr_global.G_LauncherIF.subNodeNotify(self.ID,node)
	    bmgr_global.G_EventManager.procStarted(self.ID,id)
	    return 1
#***************************************************************
#Method: start                                                 *
#Start section processes                                       *
#sectDict - dictionary mapping section global pools to         *
#underlying global resources. If nothing requested from        *
#pools for section, it will be {}.                             *
#                                                              *
#globalLst - list of global RP->UR mappings for processes.     *
#If nothing was requested from pools for processes,            *
#it will be [{},{},...].                                       * 
#                                                              *
#localLst - list of tuples (nodeName, local RP->UR dictionary) *
#for processes. If nothing was requested from pools, it will be*
#[(node1name, {}), (node2name, {}),...].                       *  
#Returns 0 on sucess; 1 if failed                              *
#***************************************************************

    def start(self,sectDict, globalLst, localLst): #starts section processes
	if not self.StartTime:
	    self.StartTime=int(time.time())
	self.sectRsrcDict=sectDict   
	self.log("Allocate global resources %s with procType %s for section" %\
		 (self.PerSectGlobal,self.ProcType))

	#allocate local and global resources for processes
	self.NodeList=[]
	for i in range(0,len(self.Procs)):
	    rsrc=localLst[i]
	    proc=self.Procs[i+1]
	    node=rsrc[0]
	    self.log("Allocated local resources %s with procType %s for  process %s on node %s" \
		     % (self.PerProcLocal,self.ProcType,i+1,node))
	    self.NodeList.append(node)
	    proc.setNode(node)
	    proc.setLocalRsrc(rsrc[1])
	    self.log("Allocate global resources %s with procType %s for process %s" %\
		     (self.PerProcGlobal,self.ProcType,i+1))
	    proc.setGlobalRsrc(globalLst[i])
	    if not i:
		self.State="Running"
	bmgr_global.G_JobDB.saveSection(self) #update database
	#start processes
	for i in range(0,len(self.Procs)):
	    proc=self.Procs[i+1]
	    id=i+1
	    node=proc.Node
	    proc.setStartTime()
	    #print "you can kill ",proc.Node
	    #time.sleep(20)
	    retVal,reason=bmgr_global.G_LauncherIF.startProcess(node,self.ID,\
		 id,self.SectParam,self.NodeList,self.PerProcLocal,\
		 self.sectRsrcDict,proc.globalRsrcDict,proc.localRsrcDict,1)
	    if retVal==-1: #failed to start the process
		self.log("Failed to start process %s on node %s: reason %s"\
			 % (id,node,reason))
		self.procExit(id,120*256,0,reason)
		if self.Need:
		    return 1
	    else:
		self.log("Send request to start process %s on node %s"\
			 % (id,node))
		proc.setStatus("running")
		bmgr_global.G_JobDB.saveSection(self)

	#wait for confirmation from launcher

	for i in range(0,len(self.Procs)):
	    proc=self.Procs[i+1]
	    id=i+1
	    node=proc.Node

	    retVal,reason=bmgr_global.G_LauncherIF.waitForProcessStart(node,\
		  self.ID,id)
	    if retVal==0:
		self.log("Failed to start process %s on node %s: reason %s"\
			 % (id,node,reason))
		self.procExit(id,120*256,0,reason)
		if self.Need:
		    return 1
	    else:	
		proc.setUnixId(retVal)
		self.log("Started process %s on node %s with unix id %s"\
			 % (id,node,retVal))
		bmgr_global.G_JobDB.saveSection(self)
		#update database
		bmgr_global.G_LauncherIF.subNodeNotify(self.ID,node)
		bmgr_global.G_EventManager.procStarted(self.ID,id)
	if self.ifLastProc():
	    self.State="Exited"
	    job=bmgr_global.G_JobFinder[self.JID]
	    job.updateSecState()
	    if len(self.Procs):
	    	self.ExitCode=-1
	    	self.terminate()
	    	return 1
	    else:
	    	self.ExitCode=0
		self.terminate()
	return 0
#*************************************************
#terminate: terminates the section               *
#sets the exit code, deallocates global resources*
#saves history,sends mail to user                *
#*************************************************
    def terminate(self):
	if self.EndTime:
	    self.log("Section already terminated, section state %s" % (self.State,))
	    return
	self.EndTime=int(time.time())
	if self.State=="Canceled":
	    self.ExitCode=1  #job never started
	else:
	    self.State="Exited"
	    if not self.ExitCode: 
		self.ExitCode=1
                #assume that section failed
		if not len(self.Procs):
                        self.ExitCode=0

		elif self.SectParam.LeaderOnly:
			if not self.Procs[1].ExitCode:
				self.ExitCode=0 #sucess
				#depends only on leader process
		elif self.Need:
		    for ind,proc in self.Procs.items():
			if proc.ExitCode:
			    self.ExitCode=1 #at least one failed
			    break
			else:
			    self.ExitCode=0
			if self.ExitCode:
			    for ind,proc in self.Procs.items():
				if proc.Status=="pending":
				    self.deallocate(ind)
				    #deallocate in the case when the process
				    #ends before end recovery and we did force
				    #allocate
			#if at least one failed :section failed
		else:
			if not self.killSend:
			    for ind,proc in self.Procs.items():
				if not proc.ExitCode:
				    #at least one process finished sucssesfully
				    self.ExitCode=0
				    break
	    try:
		bmgr_global.G_ResourceManager.deallocateGlobal\
		     (self.PerSectGlobal,self.ProcType,self.sectRsrcDict)
		self.log("Deallocate global resources %s,%s procType %s" % \
			 (self.PerSectGlobal,self.sectRsrcDict,self.ProcType))
	    except RM.Invalid_ProcType:
		self.log("Failed deallocate global resources with proc type %s" % \
			 (self.ProcType,))
	    except:
		self.log("Failed to deallocate global resources %s,%s" % \
			 (self.PerSectGlobal,self.sectRsrcDict))
	    self.log("Unsubscribe from launcher notification")
	    bmgr_global.G_LauncherIF.unsubNodeNotify(self.ID,"*")
	    job=bmgr_global.G_JobFinder[self.JID]
	    job.updateSecState()
	if not self.history():
		self.log("Store history file")
	bmgr_global.G_JobDB.saveSection(self)
	#update database
   	bmgr_global.G_JobFinder[self.JID].updateState()
	self.sendMail()
	self.log("End of Section")
#*************************************************
#sendMail: send mail to user if address is in jdf*
#*************************************************
    def sendMail(self):
	import os
	if not self.SectParam.__dict__.has_key('MailTo'):
	    return
	address=self.SectParam.MailTo
	cmd=bmgr_global.G_ServerCfg.getValue('global','*','mail_command','/bin/mail')
	
	text="Section Info:\n"
	text=text+"Job %s Section: %s\n" % (self.JID,self.Name)
	try:
		start=time.ctime(self.StartTime)
	except:
		start=None #job was canceled
	text=text+"Exec: %s\n" % self.SectParam.Exec
	text=text +"Submit_Time: %s \n" % time.ctime(self.SubTime)
	text=text+"Start_Time:  %s\n" % start
	text=text+"End_time: %s\n" % time.ctime(self.EndTime)
	text=text+"Exit Code:%s\n" % self.ExitCode
	text=text+"Number of Process %s\n" %  self.NProc
	text=text+"-----------------------------\n"
	if start:
		text=text+"Process Info:\n"
		for i in range(len(self.Procs)):
	    		text=text+"-----------------------------\n"	
	    		text=text+"Process %s\n" % (i+1)
	    		proc=self.Procs[i+1]
	    		text=text+"Node: %s\n" % proc.Node
	    		if proc.StartTime:
				text=text+"Start Time: %s\n" % (time.ctime(int(proc.StartTime)),)
	    		if proc.EndTime:
	    			text=text+"End Time: %s\n" % (time.ctime(int(proc.EndTime)),)
	    		text=text+"Exit Code:%s\n" % (proc.ExitCode,)
	    		if proc.Reason!="":
	    			text=text+"Reason:%s\n" % (proc.Reason,)
	    		text=text+"CPU Time: %s\n" % (proc.CPUTime,)
	text=text+"-----------------------------\n"
	#inserts log file into the mail body
	text=text+"Log Info:\n"
        try:
            dirName= bmgr_global.G_ServerCfg.getValue('bmgr','*','section_log_dir','/tmp')
            fdLog=open(dirName+"/"+self.ID+".log","r").readlines()
	    line=fdLog[0]
	    msg=self.logParse(line)
	    text=text+msg
	    i=1
	    for line in fdLog[1:]:
		i=i+1
		if string.find(line,'END OF JDF INFO') <0:
		    text=text+line
		else:
		    text=text+line
		    break	
	    for line in fdLog[i:]:
		msg=self.logParse(line)
		text=text+msg
	except:
	    self.log("Failed to add log file to mail:%s,%s" % \
		     (sys.exc_type, sys.exc_value))
        subject="Section %s of Job %s Info" %(self.Name,self.JID)
	ret,reason=fbs_misc.sendMail(cmd,subject, address,text)
	if ret:
	    self.log("Send mail to %s" % (address))
	else:
	     self.log("Failed to Send mail to %s,reason %s" % (address,reason))
#*************************************************
#logParse:converts time in sec to ctime          *
#*************************************************
    def logParse(self,line):
	l=string.splitfields(line,'|')
	t=time.ctime(string.atoi(l[0]))
	msg=t+line[len(l[0]):]
	return msg

#*************************************************
#history:store section info in history file      *
#*************************************************
    def history(self):
	retVal,reason=bmgr_global.G_HistoryDB.store(self)
	if retVal:
	    self.log("%s" % (reason,))
	    return 1
	return 0
#*************************************************
#state: returns section state (running, ready,   *
#zombie, waiting, etc.)                          *
#*************************************************
    
    def state(self): 
	oldState=self.lastState
	if self.lastState=="zombie": return "zombie"
	if self.State=="Running": 
	    self.lastState="running"
	elif self.State=="Canceled":
	    self.lastState="canceled"
	elif self.State=="Exited":
	    if not self.ExitCode: 
		self.lastState="done"
	    else:
		self.lastState="failed"
	elif self.State=='Pending': 
	    retVal=self.DepExp.evaluate(bmgr_global.G_JobFinder[self.JID])
	    if retVal==None:
		state="waiting"
	    elif retVal==0:
		state="zombie"
	    else:
		state="ready"
	    if state!="zombie":
		if self.SectParam.HoldTime==-1:
		    state="waiting" 
		elif self.SectParam.HoldTime: 
		    if time.time()<self.SectParam.HoldTime:
			state="waiting"
		    else:
			self.SectParam.HoldTime=None		    
	    self.lastState=state
	if self.lastState!=oldState:
	    bmgr_global.G_EventManager.sectStateChanged(self.ID,self.lastState)
	return self.lastState
#*************************************************
#destroy: removes sextion from queue, deletes    *
#section from job.sections                       *
#*************************************************
   
    def destroy(self):
	if self.state()=='zombie':
	    self.log("section is canceled because dependency is not satisfied:%s" % (self.Depend,))
	    self.State='Canceled'
	    job=bmgr_global.G_JobFinder[self.JID]
	    job.updateSecState()
	    self.terminate()
	bmgr_global.G_JobDB.deleteSection(self.ID)
	queue=bmgr_global.G_QueueFinder[self.Queue]
	queue.remSection(self.ID)
#**************************************************
#submit: add section to queue dump jdf to log file*
#**************************************************
   
   
    def submit(self):	
	queue=bmgr_global.G_QueueFinder[self.Queue]
	queue.submitSection(self)
	bmgr_global.G_JobDB.saveSection(self)
	self.dumpJDF()
#**************************************************
#dumpJdf: writes jdf info to log file             *
#**************************************************
    def dumpJDF(self):
	jdfRecord="Start of Section\n"
	jdfRecord=jdfRecord+" ************JDF  INFO*********************\n"
	cnt=0
	for key,value in self.SectParam.__dict__.items():
	    if cnt==5:#five jdf var per line
		jdfRecord=jdfRecord+'\n'
		cnt=0
	    jdfRecord=jdfRecord +" %s:  %s" % (key,value)
	    cnt=cnt+1
	jdfRecord=jdfRecord+'\n ************END OF JDF INFO****************'
	self.log(jdfRecord)
	
#****************************************************
#getProcess: returns UserPrcess object for specified*
#process id                                         * 
#****************************************************
    def getProcess(self,pnum):
	try:
	    return self.Procs[pnum]
	except:
	    return None
#*************************************************
#log: logs important messages about status of the*
#section                                         * 
#*************************************************

    def log(self,msg):
	try:	
	    dirName= bmgr_global.G_ServerCfg.getValue\
		 ('bmgr','*','section_log_dir','/tmp')
	    fdLog=open(dirName+"/"+self.ID+".log","a")
	    fdLog.write("%s|%s|%s|%s\n" % (int(time.time()),self.JID,self.Name,msg))
            fdLog.flush()
	except:
	    #print "Failed to open file  %s/%s.log: %s:%s\n" % (dirName,self.ID,sys.exc_type,sys.exc_value)
	    #print "%s|%s|%s|%s\n" % (int(time.time()),self.JID,self.Name,msg)
	    fdLog=None

	if self.SectOutput: #user wants to have own log file
	    try:
		userFd=open(self.SectOutput,'a')
		userFd.write("%s|%s|%s|%s\n" % (int(time.time()),self.JID,self.Name,msg))
		userFd.flush()
		userFd.close()
	    except:
		if fdLog:
			fdLog.write("%s|%s|%s|%s\n" % (int(time.time()),\
			 self.JID,self.Name,"Failed to write to the user log"))
 			fdLog.flush()
		#else:
		#	print ("%s|%s|%s|%s\n" % (int(time.time()),\
                #         self.JID,self.Name,"Failed to write to the user log"))
	if fdLog:
        	fdLog.close()
		
