#!/usr/bin/env python
"""
The flashproxy facilitator.
"""

import SocketServer
import getopt
import os
import socket
import sys
import threading
import time
from collections import defaultdict

from flashproxy import fac
from flashproxy import proc
from flashproxy.reg import Transport, Endpoint
from flashproxy.util import parse_addr_spec, format_addr, canonical_ip

LISTEN_ADDRESS = "127.0.0.1"
DEFAULT_LISTEN_PORT = 9002
DEFAULT_RELAY_PORT = 9001
DEFAULT_LOG_FILENAME = "fp-facilitator.log"

# Tell proxies to poll for clients every POLL_INTERVAL seconds.
POLL_INTERVAL = 600

# Don't indulge clients for more than this many seconds.
CLIENT_TIMEOUT = 1.0
# Buffer no more than this many bytes when trying to read a line.
READLINE_MAX_LENGTH = 10240

MAX_PROXIES_PER_CLIENT = 5
DEFAULT_OUTER_TRANSPORTS = ["websocket"]

LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"

class UnknownTransport(Exception): pass

class options(object):
    listen_port = DEFAULT_LISTEN_PORT
    log_filename = DEFAULT_LOG_FILENAME
    log_file = sys.stdout
    relay_filename = None
    daemonize = True
    pid_filename = None
    privdrop_username = None
    safe_logging = True
    outer_transports = DEFAULT_OUTER_TRANSPORTS

def usage(f = sys.stdout):
    print >> f, """\
Usage: %(progname)s -r RELAY <OPTIONS>
Flash proxy facilitator: Register client addresses and serve them out
again. Listen on 127.0.0.1 and port PORT (by default %(port)d).

  -d, --debug               don't daemonize, log to stdout.
  -h, --help                show this help.
  -l, --log FILENAME        write log to FILENAME (default \"%(log)s\").
  -p, --port PORT           listen on PORT (default %(port)d).
      --pidfile FILENAME    write PID to FILENAME after daemonizing.
      --privdrop-user USER  switch UID and GID to those of USER.
  -r, --relay-file RELAY    learn relays from FILE.
      --outer-transports TRANSPORTS
                            comma-sep list of outer transports to accept proxies
                            for (by default %(outer-transports)s)
      --unsafe-logging      don't scrub IP addresses from logs.\
""" % {
    "progname": sys.argv[0],
    "port": DEFAULT_LISTEN_PORT,
    "log": DEFAULT_LOG_FILENAME,
    "outer-transports": ",".join(DEFAULT_OUTER_TRANSPORTS)
}

def safe_str(s):
    """Return "[scrubbed]" if options.safe_logging is true, and s otherwise."""
    if options.safe_logging:
        return "[scrubbed]"
    else:
        return s

log_lock = threading.Lock()
def log(msg):
    with log_lock:
        print >> options.log_file, (u"%s %s" % (time.strftime(LOG_DATE_FORMAT), msg)).encode("UTF-8")
        options.log_file.flush()


class Endpoints(object):
    """
    Tracks endpoints (either client/server) and the transports they support.
    """

    matchingLock = threading.Condition()

    def __init__(self, af, maxserve=float("inf")):
        self.af = af
        self._maxserve = maxserve
        self._endpoints = {} # address -> transport
        self._indexes = defaultdict(lambda: defaultdict(set)) # outer -> inner -> [ addresses ]
        self._served = {} # address -> num_times_served
        self._cv = threading.Condition()

    def getNumEndpoints(self):
        """:returns: the number of endpoints known to us."""
        with self._cv:
            return len(self._endpoints)

    def getNumUnservedEndpoints(self):
        """:returns: the number of unserved endpoints known to us."""
        with self._cv:
            return len(filter(lambda t: t == 0, self._served.itervalues()))

    def addEndpoint(self, addr, transport):
        """Add an endpoint.

        :param addr: Address of endpoint, usage-dependent.
        :param list transports: List of transports.
        :returns: False if the address is already known, in which case no
            update is made to its supported transports, else True.
        """
        transport = Transport.parse(transport)
        with self._cv:
            if addr in self._endpoints: return False
            inner, outer = transport
            self._endpoints[addr] = transport
            self._served[addr] = 0
            self._indexes[outer][inner].add(addr)
            self._cv.notify()
            return True

    def delEndpoint(self, addr):
        """Forget an endpoint.

        :param addr: Address of endpoint, usage-dependent.
        :returns: False if the address was already forgotten, else True.
        """
        with self._cv:
            if addr not in self._endpoints: return False
            inner, outer = self._endpoints[addr]
            self._indexes[outer][inner].remove(addr) # TODO(infinity0): maybe delete empty bins
            del self._served[addr]
            del self._endpoints[addr]
            self._cv.notify()
            return True

    def _findInnerForOuter(self, *supported_outer):
        """Find all endpoint addresses that support any of the given outer
        transports. Results are grouped by the inner transport.

        :returns: { inner: [addr] }, where each address supports some outer
            transport from supported_outer.
        """
        inners = defaultdict(set)
        for outer in set(supported_outer) & set(self._indexes.iterkeys()):
            for inner, addrs in self._indexes[outer].iteritems():
                if addrs:
                    # don't add empty bins, to avoid false-positive key checks
                    inners[inner].update(addrs)
        return inners

    def _serveReg(self, addrpool):
        """
        :param list addrpool: List of candidate addresses.
        :returns: An Endpoint whose address is from the given pool. The serve
            counter for that address is also incremented, and if it hits
            self._maxserve the endpoint is removed from this collection.
        :raises: KeyError if any address is not registered with this collection
        """
        if not addrpool: raise ValueError("gave empty address pool")
        prio_addr = min(addrpool, key=lambda a: self._served[a])
        assert self._served[prio_addr] < self._maxserve
        self._served[prio_addr] += 1
        transport = self._endpoints[prio_addr]
        if self._served[prio_addr] == self._maxserve:
            self.delEndpoint(prio_addr)
        return Endpoint(prio_addr, transport)

    EMPTY_MATCH = (None, None)
    @staticmethod
    def match(ptsClient, ptsServer, supported_outer):
        """
        :returns: A tuple (client Reg, server Reg) arbitrarily selected from
            the available endpoints that can satisfy supported_outer.
        """
        if ptsClient.af != ptsServer.af:
            raise ValueError("address family not equal")
        if ptsServer._maxserve < float("inf"):
            raise ValueError("servers mustn't run out")
        # need to operate on both structures
        # so hold both locks plus a pair-wise lock
        with Endpoints.matchingLock, ptsClient._cv, ptsServer._cv:
            server_inner = ptsServer._findInnerForOuter(*supported_outer)
            client_inner = ptsClient._findInnerForOuter(*supported_outer)
            both = set(server_inner.keys()) & set(client_inner.keys())
            if not both: return Endpoints.EMPTY_MATCH
            # find a client to serve
            client_pool = [addr for inner in both for addr in client_inner[inner]]
            assert len(client_pool)
            client_reg = ptsClient._serveReg(client_pool)
            # find a server to serve that has the same inner transport
            inner = client_reg.transport.inner
            assert inner in server_inner and len(server_inner[inner])
            server_reg = ptsServer._serveReg(server_inner[inner])
            # assume servers never run out
            return (client_reg, server_reg)


class Handler(SocketServer.StreamRequestHandler):
    def __init__(self, *args, **kwargs):
        self.deadline = time.time() + CLIENT_TIMEOUT
        # Buffer for readline.
        self.buffer = ""
        SocketServer.StreamRequestHandler.__init__(self, *args, **kwargs)

    def recv(self):
        timeout = self.deadline - time.time()
        self.connection.settimeout(timeout)
        return self.connection.recv(1024)

    def readline(self):
        # A line already buffered?
        i = self.buffer.find("\n")
        if i >= 0:
            line = self.buffer[:i+1]
            self.buffer = self.buffer[i+1:]
            return line

        auxbuf = []
        buflen = len(self.buffer)
        while True:
            data = self.recv()
            if not data:
                if self.buffer or auxbuf:
                    raise socket.error("readline: stream does not end with a newline")
                else:
                    return ""
            i = data.find("\n")
            if i >= 0:
                line = self.buffer + "".join(auxbuf) + data[:i+1]
                self.buffer = data[i+1:]
                return line
            else:
                auxbuf.append(data)
                buflen += len(data)
                if buflen >= READLINE_MAX_LENGTH:
                    raise socket.error("readline: refusing to buffer %d bytes (last read was %d bytes)" % (buflen, len(data)))

    @proc.catch_epipe
    def handle(self):
        num_lines = 0
        while True:
            try:
                line = self.readline()
                if not line:
                    break
                num_lines += 1
            except socket.error, e:
                log("socket error after reading %d lines: %s" % (num_lines, str(e)))
                break
            if not self.handle_line(line):
                break

    def handle_line(self, line):
        if not (len(line) > 0 and line[-1] == '\n'):
            raise ValueError("No newline at end of string returned by readline")
        try:
            command, params = fac.parse_transaction(line[:-1])
        except ValueError, e:
            return self.error("fac.parse_transaction: %s" % e)

        if command == "GET":
            return self.do_GET(params)
        elif command == "PUT":
            return self.do_PUT(params)
        else:
            self.send_error()
            return False

    def send_ok(self):
        print >> self.wfile, "OK"

    def send_error(self):
        print >> self.wfile, "ERROR"

    def error(self, log_msg):
        log(log_msg)
        self.send_error()
        return False

    # Handle a GET request (got flashproxy poll; need to return a proper client registration)
    # Example: GET FROM="3.3.3.3:3333" PROXY-TRANSPORT="websocket" PROXY-TRANSPORT="webrtc"
    def do_GET(self, params):
        proxy_spec = fac.param_first("FROM", params)
        if proxy_spec is None:
            return self.error(u"GET missing FROM param")
        try:
            proxy_addr = canonical_ip(*parse_addr_spec(proxy_spec, defport=0))
        except ValueError, e:
            return self.error(u"syntax error in proxy address %s: %s" % (safe_str(repr(proxy_spec)), safe_str(repr(str(e)))))

        transport_list = fac.param_getlist("PROXY-TRANSPORT", params)
        if not transport_list:
            return self.error(u"GET missing PROXY-TRANSPORT param")

        try:
            client_reg, relay_reg = get_match_for_proxy(proxy_addr, transport_list)
        except Exception, e:
            return self.error(u"error getting reg for proxy address %s: %s" % (safe_str(repr(proxy_spec)), safe_str(repr(str(e)))))

        check_back_in = get_check_back_in_for_proxy(proxy_addr)

        if client_reg:
            log(u"proxy (%s) gets client '%s' (supported transports: %s) (num relays: %s) (remaining regs: %d/%d)" %
                (safe_str(repr(proxy_spec)), safe_str(repr(client_reg.addr)), transport_list, num_relays(), num_unhandled_regs(), num_regs()))
            print >> self.wfile, fac.render_transaction("OK",
                ("CLIENT", format_addr(client_reg.addr)),
                ("CLIENT-TRANSPORT", client_reg.transport.outer),
                ("RELAY", format_addr(relay_reg.addr)),
                ("RELAY-TRANSPORT", relay_reg.transport.outer),
                ("CHECK-BACK-IN", str(check_back_in)))
        else:
            log(u"proxy (%s) gets none" % safe_str(repr(proxy_spec)))
            print >> self.wfile, fac.render_transaction("NONE", ("CHECK-BACK-IN", str(check_back_in)))

        return True

    # Handle a PUT request (client made a registration request; register it.)
    # Example: PUT CLIENT="1.1.1.1:5555" TRANSPORT="obfs3|websocket"
    def do_PUT(self, params):
        # Check out if we recognize the transport in this registration request
        transport_spec = fac.param_first("TRANSPORT", params)
        if transport_spec is None:
            return self.error(u"PUT missing TRANSPORT param")

        transport = Transport.parse(transport_spec)
        # See if we have relays that support this transport
        if transport.outer not in options.outer_transports:
            return self.error(u"Unrecognized transport: %s" % transport.outer)

        client_spec = fac.param_first("CLIENT", params)
        if client_spec is None:
            return self.error(u"PUT missing CLIENT param")

        try:
            reg = Endpoint.parse(client_spec, transport)
        except (UnknownTransport, ValueError) as e:
            # XXX should we throw a better error message to the client? Is it possible?
            return self.error(u"syntax error in %s: %s" % (safe_str(repr(client_spec)), safe_str(repr(str(e)))))

        try:
            ok = put_reg(reg)
        except Exception, e:
            return self.error(u"error putting reg %s: %s" % (safe_str(repr(client_spec)), safe_str(repr(str(e)))))

        if ok:
            log(u"client %s (transports: %s) (remaining regs: %d/%d)" % (safe_str(unicode(reg)), reg.transport, num_unhandled_regs(), num_regs()))
        else:
            log(u"client %s (already present) (transports: %s) (remaining regs: %d/%d)" % (safe_str(unicode(reg)), reg.transport, num_unhandled_regs(), num_regs()))

        self.send_ok()
        return True

    finish = proc.catch_epipe(SocketServer.StreamRequestHandler.finish)

class Server(SocketServer.ThreadingMixIn, SocketServer.TCPServer):
    allow_reuse_address = True

# Addresses are plain tuples (str(host), int(port))

CLIENTS = {
    socket.AF_INET: Endpoints(af=socket.AF_INET, maxserve=MAX_PROXIES_PER_CLIENT),
    socket.AF_INET6: Endpoints(af=socket.AF_INET6, maxserve=MAX_PROXIES_PER_CLIENT)
}

RELAYS = {
    socket.AF_INET: Endpoints(af=socket.AF_INET),
    socket.AF_INET6: Endpoints(af=socket.AF_INET6)
}

def num_relays():
    """Return the total number of relays."""
    return sum(pts.getNumEndpoints() for pts in RELAYS.itervalues())

def num_regs():
    """Return the total number of registrations."""
    return sum(pts.getNumEndpoints() for pts in CLIENTS.itervalues())

def num_unhandled_regs():
    """Return the total number of unhandled registrations."""
    return sum(pts.getNumUnservedEndpoints() for pts in CLIENTS.itervalues())

def addr_af(addr_str):
    """Return the address family for an address string. This is a plain string,
    not a tuple, and IPv6 addresses are not bracketed."""
    addrs = socket.getaddrinfo(addr_str, 0, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP, socket.AI_NUMERICHOST)
    return addrs[0][0]

def get_match_for_proxy(proxy_addr, transport_list):
    af = addr_af(proxy_addr[0])
    try:
        return Endpoints.match(CLIENTS[af], RELAYS[af], transport_list)
    except ValueError as e:
        raise UnknownTransport("Could not find registration for transport list: %s: %s" % (transport_list, e))

def get_check_back_in_for_proxy(proxy_addr):
    """Get a CHECK-BACK-IN interval suitable for this proxy."""
    return POLL_INTERVAL

def put_reg(reg):
    """Add a registration."""
    af = addr_af(reg.addr[0])
    return CLIENTS[af].addEndpoint(reg.addr, reg.transport)

def parse_relay_file(servers, fp):
    """Parse a file containing Tor relays that we can point proxies to.
    Throws ValueError on a parsing error. Each line contains a transport chain
    and an address, for example
        obfs2|websocket 1.4.6.1:4123
    :returns: number of relays added
    """
    n = 0
    for line in fp.readlines():
        line = line.strip("\n")
        if not line or line.startswith('#'): continue
        try:
            transport_spec, addr_spec = line.strip().split()
        except ValueError, e:
            raise ValueError("Wrong line format: %s." % repr(line))
        addr = parse_addr_spec(addr_spec, defport=DEFAULT_RELAY_PORT)
        transport = Transport.parse(transport_spec)
        if transport.outer not in options.outer_transports:
            raise ValueError(u"Unrecognized transport: %s" % transport)
        af = addr_af(addr[0])
        servers[af].addEndpoint(addr, transport)
        n += 1
    return n

def main():
    opts, args = getopt.gnu_getopt(sys.argv[1:], "dhl:p:r:", [
        "debug",
        "help",
        "log=",
        "port=",
        "pidfile=",
        "privdrop-user=",
        "relay-file=",
        "unsafe-logging",
    ])
    for o, a in opts:
        if o == "-d" or o == "--debug":
            options.daemonize = False
            options.log_filename = None
        elif o == "-h" or o == "--help":
            usage()
            sys.exit()
        elif o == "-l" or o == "--log":
            options.log_filename = a
        elif o == "-p" or o == "--port":
            options.listen_port = int(a)
        elif o == "--pidfile":
            options.pid_filename = a
        elif o == "--privdrop-user":
            options.privdrop_username = a
        elif o == "-r" or o == "--relay-file":
            options.relay_filename = a
        elif o == "--outer-transports":
            options.outer_transports = a.split(",")
        elif o == "--unsafe-logging":
            options.safe_logging = False

    if not options.relay_filename:
        print >> sys.stderr, """\
The -r option is required. Give it the name of a file
containing relay transports and addresses.
  -r HOST[:PORT]
Example file contents:
obfs2|websocket 1.4.6.1:4123\
"""
        sys.exit(1)

    try:
        with open(options.relay_filename) as fp:
            n = parse_relay_file(RELAYS, fp)
            if not n:
                raise ValueError("file contained no relays")
    except ValueError as e:
        print >> sys.stderr, u"Could not parse file %s: %s" % (repr(options.relay_filename), str(e))
        sys.exit(1)

    # Setup log file
    if options.log_filename:
        options.log_file = open(options.log_filename, "a")
        # Send error tracebacks to the log.
        sys.stderr = options.log_file
    else:
        options.log_file = sys.stdout

    addrinfo = socket.getaddrinfo(LISTEN_ADDRESS, options.listen_port, 0, socket.SOCK_STREAM, socket.IPPROTO_TCP)[0]

    server = Server(addrinfo[4], Handler)

    log(u"start on %s" % format_addr(addrinfo[4]))
    log(u"using IPv4 relays %s" % str(RELAYS[socket.AF_INET]._endpoints))
    log(u"using IPv6 relays %s" % str(RELAYS[socket.AF_INET6]._endpoints))

    if options.daemonize:
        log(u"daemonizing")
        pid = os.fork()
        if pid != 0:
            if options.pid_filename:
                f = open(options.pid_filename, "w")
                print >> f, pid
                f.close()
            sys.exit(0)

    if options.privdrop_username is not None:
        log(u"dropping privileges to those of user %s" % options.privdrop_username)
        try:
            proc.drop_privs(options.privdrop_username)
        except BaseException, e:
            print >> sys.stderr, "Can't drop privileges:", str(e)
            sys.exit(1)

    try:
        server.serve_forever()
    except KeyboardInterrupt:
        sys.exit(0)

if __name__ == "__main__":
    main()
