#!/usr/bin/env python

# -------------------------------
# Checker for web server. If failure detected, restarts glassfish and sends report
# 
# Copyright (c) Sergey Klyaus, Tune-IT
#               2012
# -------------------------------

import subprocess
import socket
import sys
import os
import re
from signal import SIGKILL
from time import ctime, sleep

import smtplib
import subprocess

from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

_TRACE = False

HOST = 'localhost'
WEBHOST = 'google.com'
RESOURCE = '/'
PORT = 80

# If server not responded in 15 seconds, return failure
TIMEOUT = 15.0

LOG_PATH = '/var/log/webchecker.log'
MAX_LOG_SIZE = 2 * 1024 * 1024

MAILHOST = 'localhost'
MAILPORT = 25

MAILFROM = ''
MAILTO = ['myaut@tune-it.ru']

REPORTFILE = '/data/glassfish3/jvmreport.txt'

MSGBODY='''Instance failure was detected. Restarting.

See attached files for diagnostic information.'''

logf = None
sock = None

def do_trace(message):
    if _TRACE:
        print >> sys.stderr, message

def log_open():   
    global logf

    mode = 'a'
    
    if os.path.exists(LOG_PATH):
        # Rotate logs 
        log_size = os.stat(LOG_PATH).st_size
        
        if log_size > MAX_LOG_SIZE:
            old_log_path = LOG_PATH + '.old'
            
            if os.path.exists(old_log_path):
                os.remove(old_log_path)
            
            os.rename(LOG_PATH, old_log_path)
            
            mode = 'w'
    else:
        mode = 'w'
    
    logf = file(LOG_PATH, mode)
    
    print >> logf, '-=' * 25 + '-'

def log(message):
    print >> logf, '[%s] %s' % (ctime(), message)
        
def log_close():
    logf.close()

def gen_message():
    msg = MIMEMultipart()
    
    msg['Subject'] = 'Your site webchecker notification'
    msg['From'] = MAILFROM
    msg['To'] = ', '.join(MAILTO)
    
    try:
        repf = file(REPORTFILE, 'r')
    except OSError as e:
        pass
    
    msg.attach(MIMEText('See attached files for diagnostic information'))
    
    report = MIMEText(repf.read())
    repf.close()

    report.add_header('Content-Disposition', 'attachment', filename='jvmreport.txt')    
    msg.attach(report)
    
    return msg.as_string()

def send_report():
    rc = subprocess.call(['/etc/init.d/glassfish', 'jvm-report'])
    
    mail_client = smtplib.SMTP(MAILHOST, MAILPORT)
    mail_body = gen_message()
    
    mail_client.sendmail(MAILFROM, MAILTO, mail_body)
    
    mail_client.quit() 
    
    log('Mail is sent to ' + ', '.join(MAILTO))
    
def get_instance_pid():
    '''Gather instances from jps output and return instance pid
    
    return -1 if running instance is not detected'''
    jps = '/usr/java/jdk1.6.0_31/bin/jps'
    jps_opts = '-mlvV'
    
    proc = subprocess.Popen([jps, jps_opts], stdout=subprocess.PIPE)
    proc.wait()
    
    for line in proc.stdout.readlines():
        pid, cmd = line.split(None, 1)
        
        if '-instancename instance1' in cmd:
            return int(pid)
    
    return -1
    
def check(host, port, resource='/', webhost=None):
    '''Check connection with server
    
    0 - Web server is working
    1 - Failure detected
    2 - Internal error (i.e. connection refused)'''
    global sock
    
    if webhost == None:
        webhost = host
    
    # Open socket & connect
    sock = socket.socket()
    sock.settimeout(TIMEOUT)
    
    log('Attempting to connect to %s:%d' % (host, port))
    
    try:
        sock.connect((host, port))        
    except socket.error as e:
        log('Connection failed  %s:%d' % (host, port))
        log('Socket error: %s' % e)
        
        return 2
    else:
        do_trace('Connected to %s:%d' % (host, port))
    
    # Send HTTP request and receive responce
    try:
        request_string = 'GET %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (resource, webhost)
        do_trace('Request: %s' % repr(request_string))
        
        sock.send(request_string)
        rsp = sock.recv(100)     # 100 bytes is enough
    except socket.error as e:
        log('Send-receive stage failed  %s:%d' % (host, port))
        log('Socket error: %s' % e)
        
        return 1
    except socket.timeout as t:
        log('Socket timed out')
        
        return 1
    
    lines = rsp.splitlines()
    
    if len(lines) == 0:
        return 2
    
    do_trace('First line of HTTP responce: %s' % lines[0])
    
    # Parse and analyze responce
    try:
        version, status, message = re.split(r'\s+', lines[0], 2)
        do_trace('Version: %s Status: %s Message: %s' % (version, status, message))
    except ValueError:
        do_trace('Failed to parse incoming responce')
        return 2
    
    if status in ['200', '301']:
        log('Web server returned successful status %s' % status)
        return 0
    else:
        log('Web server failure: status %s' % status)
        
        return 2

def restart_glassfish(pid):
    log('Killing instance with pid=%d' % pid)
    
    os.kill(pid, SIGKILL)
    sleep(3.0)
    
    start_cmd = '/etc/init.d/glassfish start-cluster'
    rc = subprocess.call(start_cmd, shell=True)
    log('Started cluster using %s; returned: %d' % (repr(cmd), rc))
    
try:
    log_open()
    
    pid = get_instance_pid()
    
    if pid == -1:
        log('Instance not running')
        sys.exit(1)
    
    log('Detected glassfish cluster instance pid: %d' % pid)
    
    rc = check(HOST, PORT, RESOURCE, WEBHOST)
    
    if rc == 0:
        log('Check successful!')
    elif rc == 1:
        log('Failure detected, restarting service...')
        send_report()
        restart_glassfish(pid)
    elif rc == 2:
        log('Internal error; ignoring')
finally:
    if sock:
        sock.close()
    
    if logf:
        log_close()