""" A python script to scrape the target.com mobile site, looking for iPad 2s. Author: PK Shiu pk @ pkshiu.com http://www.imperial-consulting.com Credits: - original list of links are from postings on macrumors.com This work is licensed under a Creative Commons Attribution-NonCommercial 3.0 Unported License. http://creativecommons.org/licenses/by-nc/3.0/ """ import urllib, urllib2 from BeautifulSoup import BeautifulSoup # Change this to your zip MY_ZIP='02118' def parse(product_name, fd): """ Use beautifulSoup to scrape the resulting HTML. """ soup = BeautifulSoup(fd) # find ul class="itemList" ul = soup.find('ul', { 'class':'itemList'}) # now extract just the li's, ignoring spaces etc. for li in ul.findAll('li'): href = li.a['href'] store_name = li.strong.string.strip() print 'trying %s at %s...' % (product_name, store_name) if href.find('Out Of Stock') < 0: print 'GOT ONE !! copy and paste this link to see more details:' print 'http://sites.target.com/site/en/spot/%s' % href def go(product_name, dcpi,zipcode=MY_ZIP): """ Lookup one product """ params = urllib.urlencode({ '_dyncharset': 'ISO-8859-1', 'zipcode': zipcode, 'asin':'', 'dpci': dcpi, 'city':'', 'state':'', }) url = 'http://sites.target.com/site/en/spot/mobile_fiats_results.jsp?_DARGS=/site/en/spot/mobile_fiats.jsp' req = urllib2.Request(url) fd = urllib2.urlopen(req, params) parse(product_name, fd) if __name__ == '__main__': items = ( ('black 16g', '057-10-1830'), ('black 32g', '057-10-1831'), ('black 64g', '057-10-1832'), ('black 16g att', '057-10-1833'), ('black 32g att', '057-10-1834'), ('black 64 att', '057-10-1835'), ('black 16g vzw', '057-10-1836'), ('black 32g vzw', '057-10-1837'), ('black 64g vzw', '057-10-1838'), ('white 16g', '057-10-1838'), ('white 32g', '057-10-1840'), ('white 64g', '057-10-1841'), ('white 16g att', '057-10-1842'), ('white 32g att', '057-10-1843'), ('white 64g att', '057-10-1844'), ('white 16g vzw', '057-10-1845'), ('white 32g vzw', '057-10-1846'), ('white 46g vzw', '057-10-1847'), ) for product_name, sku in items: go(product_name, sku)