#!/usr/bin/python

"""
    omximdb - mini IMDB search tool for omxremote.
    Used imdbpy previously, but it's far too heavyweight
    (and slow!) for what we need.
"""

import re, urllib2

URL_TITLE = "http://www.imdb.com/title/tt%s/?ref_=fn_al_tt_1"
RE_TT = r"href=\"\/title\/tt(\d+?)\/"

URL_FIND = "http://www.imdb.com/find?ref_=nv_sr_fn&q=%s&s=all"
RE_TITLE = r"property=\'og:title' content=\"(.+?)\""
RE_PLOT = r"property=\"og:description\" content=\"(.+?)\""
RE_COVER = r"property=\'og:image\' content=\"(.+?)\""
COVER_SIZE = "V1_SY80_CR91,0,80,80_AL.jpg"
RE_RATING = r"rated this (.+?)\/1"
RE_DURATION = r"<time.+?>(.+?)</time>"
RE_YEAR = r"\d\d\d\d"

def _res(r):
    if len(r) > 0: 
        s = r[0].strip()
        if s.find("<a") != -1:
            s = s[0:s.find("<a")].strip()
        if type(s) == unicode:
            return s.encode("ascii", "xmlcharrefreplace")
        return s
    return ""

def search(q):
    """
    Searches IMDB for the movie "q" and returns
    a dictionary of values, including 
    title, rating, plotoutline and coverurl
    """
    # Submit the query to the find page
    q = q.replace(" ", "+")
    r = urllib2.urlopen(URL_FIND % q).read()

    # Extract the first title in the result
    tt = re.findall(RE_TT, r)
    if len(tt) == 0: return None

    # If we have one, open the title page
    r = urllib2.urlopen(URL_TITLE % tt[0]).read().replace("\n", " ")
    r = unicode(r, "utf-8")

    # Extract all the bits
    title = _res(re.findall(RE_TITLE, r))
    year = _res(re.findall(RE_YEAR, title))
    if title.find("(") != -1: 
        title = title[0:title.find("(")].strip()

    plotoutline = _res(re.findall(RE_PLOT, r))

    coverurl = _res(re.findall(RE_COVER, r))
    if coverurl.find("_V1_") != -1:
        coverurl = coverurl[0:coverurl.find("V1")] + COVER_SIZE

    rating = _res(re.findall(RE_RATING, r))
    if rating.find(".") == -1 and rating != "":
        rating += ".0"

    duration = _res(re.findall(RE_DURATION, r))
    if duration != "":
        plotoutline += " (%s, %s)" % (duration, year)

    rv = {
        "movieid":      _res(tt),
        "rating":       rating,
        "plotoutline":  plotoutline,
        "coverurl":     coverurl,
        "title":        title,
        "duration":     duration,
        "year":         year
    }
    return rv

if __name__ == "__main__":
    print search("edge of tomorrow")

