Attachment 'C__Gmail_gmail.py'

Download

   1 """
   2 gmail.py -- Python interface to Gmail (http://www.gmail.com/)
   3 
   4 Known to work with Python 2.3+.
   5 
   6 Sample usage:
   7 
   8 >>> c = GmailClient()
   9 >>> c.login('username', 'password')
  10 >>> c.get_inbox_conversations()
  11 ['free viagra', 'welcome to css-discuss', 'hey, how are you?']
  12 >>> c.get_inbox_conversations(is_unread=True)
  13 ['free viagra', 'hey, how are you?']
  14 >>> c.get_inbox_conversations(subject='css')
  15 ['welcome to css-discuss']
  16 >>> c.get_inbox_conversations()[2]
  17 'hey, how are you?'
  18 >>> c.get_inbox_conversations()[2].get_messages()
  19 [<email.Message.Message instance at 0xf6bbad4c>, <email.Message.Message instance at 0xf6bbad8a>]
  20 >>> print c.get_inbox_conversations()[2].get_messages()[0]
  21 # outputs raw e-mail source
  22 >>> c.get_contacts()
  23 [['jlennon@gmail.com', 'John Lennon'], ['billy@hotmail.com', 'Billy Shears']]
  24 >>> c.add_contact('George', 'george@yahoo.com')
  25 >>> c.delete_contact('jlennon@gmail.com')
  26 """
  27 
  28 # Copyright (C) 2004, Adrian Holovaty
  29 #
  30 # This program is free software; you can redistribute it and/or modify it under
  31 # the terms of the GNU General Public License as published by the Free Software
  32 # Foundation; either version 2 of the License, or (at your option) any later
  33 # version.
  34 #
  35 # This program is distributed in the hope that it will be useful, but WITHOUT
  36 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  37 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  38 # details.
  39 #
  40 # You should have received a copy of the GNU General Public License along with
  41 # this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  42 # Place, Suite 330, Boston, MA 02111-1307 USA
  43 
  44 # Changelog:
  45 #
  46 # 0.1 (2004-06-18)
  47 #     Initial version. Support for login() and get_inbox_messages().
  48 # 0.2 (2004-06-20)
  49 #     Added get_contacts(), add_contact() and delete_contact().
  50 #     Changed get_inbox_messages() to get_inbox_conversations().
  51 #     Made get_inbox_conversations() subject parameter case-insensitive.
  52 # 0.3 (2004-06-24) -- Patch from Gustavo Sverzut Barbieri (Thanks, Gustavo!)
  53 #     GmailClient.login() now raises LoginFailure on failure.
  54 #     Added socket.setdefaulttimeout(30).
  55 #     Added UTF-8 support (GmailClient._encode()).
  56 #
  57 # To do:
  58 # * Optionally mark messages as read when they're retrieved.
  59 # * Clean HTML in Conversation subjects.
  60 # * Add GmailClient.get_conversations_by_label() method.
  61 
  62 
  63 __version__ = "0.3"
  64 __date__ = "2004-06-24"
  65 __author__ = "Adrian Holovaty (holovaty@gmail.com)"
  66 
  67 from Cookie import SimpleCookie
  68 import email, random, re, socket, time, urllib, urllib2
  69 
  70 EMAILS_RE = re.compile('\nD\((\["t",.*?\])\n\);', re.DOTALL)
  71 MESSAGE_INFO_RE = re.compile('\nD\((\["mi",.*?\])\n\);', re.DOTALL)
  72 # Gmail says this when it complains
  73 FAILURE_MESSAGE = 'Your action was not successful'
  74 LOGIN_FAILURE_MESSAGE = 'Username and password do not match.'
  75 
  76 class BadGmailTransaction(Exception):
  77     "Base exception raised when Gmail transactions don't work"
  78     pass
  79 
  80 class ContactCouldNotBeAdded(BadGmailTransaction):
  81     pass
  82 
  83 class ContactCouldNotBeDeleted(BadGmailTransaction):
  84     pass
  85 
  86 class LoginFailure(BadGmailTransaction):
  87     pass
  88 
  89 socket.setdefaulttimeout(30)
  90 
  91 class GmailClient:
  92     def __init__(self):
  93         self._cookies = SimpleCookie()
  94         self._folder_cache, self._message_cache = {}, {}
  95         self._contacts = []
  96 
  97     def login(self, username, password):
  98         """
  99         Logs into Gmail with the given username and password.
 100         Raises LoginFailure if the login fails.
 101         """
 102         epoch_secs = int(time.time())
 103         self._cookies["GMAIL_LOGIN"] = "T%s/%s/%s" % (epoch_secs-2, epoch_secs-1, epoch_secs)
 104         p = self._get_page("https://www.google.com/accounts/ServiceLoginBoxAuth",
 105             post_data="continue=https://gmail.google.com/gmail&service=mail&Email=%s&Passwd=%s&submit=null" % (username, password))
 106         c = p.read()
 107         p.close()        
 108         #r = re.search('var cookieVal\s*=\s*"([^"]+)"', c)
 109         r = re.search('top.location \s*=\s*"([^"]+)"', c)               
 110         if not r or c.find(LOGIN_FAILURE_MESSAGE) > -1:
 111             raise LoginFailure, "Wrong username or password."
 112         self._cookies['GV'] = r.groups()[0]
 113         #p = self._get_page("https://www.google.com/accounts/CheckCookie?continue=http%3A%2F%2Fgmail.google.com%2Fgmail&service=mail&chtml=LoginDoneHtml")
 114         p = self._get_page("https://www.google.com/accounts/" + str(r.groups()[0]))
 115         p.close()
 116         p = self._get_page("http://www.google.com/")
 117         p.close()
 118         p = self._get_page("http://gmail.google.com/gmail?view=page&name=js")
 119         c = p.read()
 120         p.close()
 121         r = re.search("var js_version\s*=\s*'([^']+)'", c)
 122         if not r:
 123             raise LoginFailure, "Gmail might have redesigned."
 124         self._js_version = r.groups()[0]
 125        
 126     def get_inbox_conversations(self, is_unread=None, is_starred=None, label=None, subject=None):
 127         """
 128         Returns a list of all the messages in the inbox matching the given
 129         search parameters, as GmailMessageStub objects.
 130 
 131         Parameters:
 132             is_unread:  Boolean (or just 1 or 0). Limits the results to read
 133                         vs. unread conversations. A conversation is read if
 134                         *every one* of its messages has been read.
 135             is_starred: Boolean (or just 1 or 0). Limits the results to starred
 136                         vs. unstarred conversations.
 137             label:      String. Limits the results to conversations having the
 138                         exact given label.
 139             subject:    String. Limits the results to conversations containing
 140                         the given string in their subject. (Case-insensitive.)
 141         """
 142         message_stubs = []
 143         for stub in self._get_message_stubs(folder='inbox'):
 144             if is_unread is not None and stub.is_unread != is_unread:
 145                 continue
 146             if is_starred is not None and stub.is_starred != is_starred:
 147                 continue
 148             if label is not None and label not in stub.label_list:
 149                 continue
 150             if subject is not None and stub.subject.lower().find(subject.lower()) == -1:
 151                 continue
 152             message_stubs.append(stub)
 153         return message_stubs
 154 
 155     def add_contact(self, name, email, notes=''):
 156         """
 157         Adds a contact with the given name, e-mail and notes to this Gmail
 158         account's address book. Raises ContactCouldNotBeDeleted on error.
 159         """
 160         p = self._get_page("https://gmail.google.com/gmail?view=address&act=a",
 161             post_data="at=%s&name=%s&email=%s&notes=%s&ac=Add+Contact&operation=Edit" % \
 162             (self._cookies['GMAIL_AT'].value, self._url_quote(name),
 163             self._url_quote(email), self._url_quote(notes)))
 164         if p.read().find(FAILURE_MESSAGE) > -1:
 165             raise ContactCouldNotBeAdded, "Gmail might have redesigned."
 166 
 167     def delete_contact(self, email):
 168         """
 169         Deletes the contact with the given e-mail address from this Gmail
 170         account's address book. Raises ContactCouldNotBeDeleted on error.
 171         """
 172         contact_index = None
 173         for i, c in enumerate(self.get_contacts()):
 174             if c[0] == email:
 175                 contact_index = i + 1
 176                 break
 177         if contact_index is None:
 178             raise ContactCouldNotBeDeleted, "The e-mail address '%s' wasn't in your Gmail address book." % email
 179         p = self._get_page("https://gmail.google.com/gmail?view=address&act=a",
 180             post_data="operation=Delete&at=%s&email%s=%s" % \
 181             (self._cookies['GMAIL_AT'].value, contact_index, urllib.quote_plus(email)))
 182         if p.read().find(FAILURE_MESSAGE) > -1:
 183             raise ContactCouldNotBeDeleted, "Gmail might have redesigned."
 184 
 185     def get_contacts(self, clear_cache=False):
 186         """
 187         Returns a list of lists representing all the contacts for this Gmail
 188         account, in the format ['email', 'contact name'].
 189         """
 190         if clear_cache or not self._contacts:
 191             p = self._get_page("https://gmail.google.com/gmail?view=page&name=contacts&zx=%s%s" % \
 192                 (self._js_version, self._get_random_int()))
 193             # The returned page contains only a JavaScript data structure that
 194             # looks like this:
 195             # [["jlennon@gmail.com","John Lennon"]
 196             # ,["billy@hotmail.com","Billy Shears"]
 197             # ,["percy@yahoo.com","Percy Thrillington"]
 198             # ]
 199             # Because this is exactly the same syntax as Python lists, we can
 200             # use an eval() on it to suck it into Python. THIS IS A SECURITY
 201             # RISK, THOUGH, because it blindly trusts Gmail's page isn't going
 202             # to include evil Python code.
 203             A=p.read()
 204             self.get_info(A)            
 205             self._contacts = eval(A)
 206         return self._contacts
 207 
 208     def _get_page(self, url, post_data=None):
 209         """
 210         Helper method that gets the given URL, handling the sending and storing
 211         of cookies. Returns the requested page as a file-like object in the
 212         format returned by urllib2.urlopen().
 213         """
 214         print url
 215         req = urllib2.Request(self._encode(url))
 216         if post_data is not None:
 217             req.add_data(self._encode(post_data))
 218         req.add_header('Cookie', self._encode(self._cookies.output(attrs=[], header='').strip()))
 219         req.add_header('Charset', 'utf-8')
 220         f = urllib2.urlopen(req)
 221         if f.headers.dict.has_key('set-cookie'):
 222             self._cookies.load(f.headers.dict['set-cookie'])
 223         return f
 224 
 225     def _get_random_int(self):
 226         """
 227         Helper method that returns a random number suitable for Gmail's "zx"
 228         query parameter, which is needed (required?) in some cases.
 229         """
 230         return random.randint(0, 1000000000)
 231 
 232     def _get_message_stubs(self, folder, offset=0):
 233         """
 234         Helper method that retrieves the given folder from a Gmail account
 235         and returns a list of Conversation objects, each representing a
 236         conversation in the folder. Saves its result in a cache the first time
 237         it's called.
 238 
 239         KNOWN LIMITATION: If a folder spans more than one page, this method
 240         will only return the messages on the first page.
 241         """
 242         if not self._folder_cache.has_key(folder):
 243             html = self._get_page("http://gmail.google.com/gmail?search=%s&view=tl&start=%d&init=1&zx=%s%s" % \
 244                 (folder, offset, self._js_version, self._get_random_int())).read()
 245             
 246             r = re.search('\nD\((\["ts",.*?\])\n\);', html)        
 247             self.ts=eval(r.groups()[0])
 248             r = re.search('\nD\((\["ds",.*?\])\n\);', html)        
 249             self.ds=eval(r.groups()[0])            
 250             r = re.search('\nD\((\["qu",.*?\])\n\);', html)        
 251             self.qu=eval(r.groups()[0])
 252             # We can use Python's eval() on the JavaScript source Gmail spits
 253             # out, because it's in Python-friendly list syntax. This IS a
 254             # slight security risk, of course.
 255             stub_list = []
 256             for match in EMAILS_RE.findall(html):
 257                 for msg_bits in eval(match)[1:]:
 258                     stub_list.append(Conversation(self, *msg_bits))
 259             self._folder_cache[folder] = stub_list
 260         return self._folder_cache[folder]
 261 
 262     def _get_raw_email(self, message_id):
 263         """
 264         Retrieves the message with the given message ID and returns it as an
 265         email.Message.Message. Saves its result in a cache the first time an
 266         e-mail is retrieved.
 267         """
 268         if not self._message_cache.has_key(message_id):
 269             f = self._get_page("http://gmail.google.com/gmail?view=om&th=%s&zx=%s%s" % \
 270                 (message_id, self._js_version, self._get_random_int()))
 271             self._message_cache[message_id] = email.message_from_string(f.read().lstrip())
 272         return self._message_cache[message_id]
 273 
 274     def _url_quote(self, value):
 275         """
 276         Helper method that quotes the given value for insertion into a query
 277         string. Also encodes into UTF-8, which Google uses, in case of
 278         non-ASCII characters.
 279         """
 280         value = self._encode(value)
 281         return urllib.quote_plus(value)
 282 
 283     def _encode(self, value):
 284         """
 285         Helper method. Google uses UTF-8, so convert to it, in order to allow
 286         non-ASCII characters.
 287         """
 288         if isinstance(value, unicode):
 289             value = value.encode("utf-8")
 290         return value
 291     def Close(self):
 292         p = self._get_page("http://gmail.google.com/gmail?logout")
 293         p.close()        
 294  
 295 class Conversation:
 296     """
 297     Represents the minimal information known about an conversation from
 298     scraping a Gmail folder list page and provides a way of retrieving the full
 299     messages.
 300     """
 301     def __init__(self, client, thread_id, is_unread, is_starred, date_html,
 302             authors_html, flags, subject_html, snippet_html, label_list,
 303             attach_html, matching_msgid, extra_snippet):
 304         self.client, self.thread_id = client, thread_id
 305         self.is_unread, self.is_starred = is_unread, is_starred
 306         self.date_html, self.authors_html = date_html, authors_html
 307         self.flags, self.subject = flags, subject_html
 308         self.snippet_html, self.label_list = snippet_html, label_list
 309         self.attach_html, self.matching_msgid = attach_html, matching_msgid
 310         self.extra_snippet = extra_snippet
 311         self._message_id_cache = []
 312 
 313     def __repr__(self):
 314         return self.subject
 315 
 316     def get_messages(self):
 317         """
 318         Returns a list of all messages in this conversation, in chronological
 319         order, as email.Message.Message objects.
 320         """
 321         if not self._message_id_cache:
 322             html = self.client._get_page("https://gmail.google.com/gmail?view=cv&search=inbox&th=%s&zx=%s%s" % \
 323                 (self.thread_id, self.client._js_version, self.client._get_random_int())).read()
 324             message_ids = []
 325             for match in MESSAGE_INFO_RE.findall(html):
 326                 # Note the eval(), which is a security risk.
 327                 message_ids.append(eval(match)[3])
 328             self._message_id_cache = message_ids
 329         return [self.client._get_raw_email(i) for i in self._message_id_cache]

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.