Eczane


from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.http.request import Request
from amiral.items import PhoneItem
import re
import sys

class EbaySpider(CrawlSpider):
    name = "ebay"
    allowed_domains = ["www.ebay.com"]
    start_urls = ["https://www.ebay.com/sch/Cell-Phones-Smartphones-/9355/i.html"]

    # if you want, we can call MongoDB Database here
    custom_settings = {
        'ITEM_PIPELINES': {
            'amiral.pipelines.JsonWriterPipeline': 800
        },
    }

    rules = [
        Rule (SgmlLinkExtractor(restrict_xpaths=('//a[@class="gspr next"]',)), callback='parseEbay' , follow= True),
        # Rule(SgmlLinkExtractor(allow=["(.*?)\/telephone-4g-maroc\/"]), callback='parsePhones', follow=False),
        # Rule(SgmlLinkExtractor(allow=["(.*?)\/telephone-4g-maroc\/\?page=(\d+)"]), callback='parsePhones', follow=False)
    ]

    def parseEbay(self,response):
        products = response.css(".sresult h3.lvtitle a.vip::attr(href)").extract()
        items = []
        for product in products:
            yield Request(url=product, callback=self.parseEbayDetail)

    def parseEbayDetail(self,response):
        item["url"]   = response.url
        item["image"] = response.css("img#icImg::attr(src)").extract()[0].strip()
        item["brand"] = response.css("h2[itemprop=\"brand\"] span::text").extract()[0].strip()
        priceDiv = response.css("span[itemprop=\"price\"]::text").extract()
        if priceDiv :
            price = priceDiv[0].strip()
        priceDiv = response.css("#mm-saleDscPrc::text").extract()
        if priceDiv :
            price = priceDiv[0].strip()
        price = re.search('[0-9\.]+', price).group(0)
        item["name"]  = response.css("h2[itemprop=\"model\"] span::text").extract()[0].strip()
        item["price"] = product.css("#saleDscPrc::text").extract()[0].strip())
        item["source"] = self.storeDomain

        print "____________333"
        print img
        print "____________333"
        # products = response.css("section.products.-mabaya > div.sku[data-sku]")
        # items = []
        #
        #
        # for product in products:
        #   item = PhoneItem()
        #   item["type"]  = "phone"
        #   item["link"]  = product.css("a.link::attr(href)").extract()[0].strip()
        #   item["image"] = product.css("img.lazy.image::attr(data-src)").extract()[0].strip()
        #   item["brand"] = product.css("h2.title span.brand::text").extract()[0].strip()
        #
        #   item["name"]  = product.css("h2.title > span.name::text").extract()[0].strip().encode(sys.stdout.encoding, errors='replace')
        #   item["price"] = product.css("div.price-container > span.price-box > span.price > span[data-price]::attr(data-price)").extract()[0].strip()
        #   items.append(item)

        # return items