computerdeals
get products
create spider
1 2 3
| (myenv10_scrapy) D:\work\run\python_crawler\107-selenium\silkdeals>scrapy genspider computerdeals slickdeals.net/computer-deals Created spider 'computerdeals' using template 'basic' in module: silkdeals.spiders.computerdeals
|
computerdeals.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| import scrapy from scrapy_selenium import SeleniumRequest
class ComputerdealsSpider(scrapy.Spider): name = 'computerdeals'
def start_requests(self): yield SeleniumRequest( url='https://slickdeals.net/computer-deals/', wait_time=3, callback=self.parse )
def parse(self, response): products = response.xpath("//ul[@class='dealTiles categoryGridDeals blueprint']/li") for product in products: base_url = "https://slickdeals.net/computer-deals" yield { 'name': product.xpath(".//a[@class='itemTitle bp-p-dealLink bp-c-link']/text()").get(), 'link': base_url + product.xpath(".//a[@class='itemTitle bp-p-dealLink bp-c-link']/@href").get(), 'store_name': product.xpath(".//span[@class='blueprint']/button['itemStore bp-p-storeLink bp-c-linkableButton bp-c-button js-button bp-c-button--link']/text()").get(), 'price': product.xpath("normalize-space(.//div[@class='itemPrice wide ']/text())").get() }
|
run
1
| (myenv10_scrapy) D:\work\run\python_crawler\107-selenium\silkdeals>scrapy crawl computerdeals
|
computerdeals.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| import scrapy from scrapy_selenium import SeleniumRequest
class ComputerdealsSpider(scrapy.Spider): name = 'computerdeals'
def remove_characters(self, value): return value.strip('\xa0')
def start_requests(self): yield SeleniumRequest( url='https://slickdeals.net/computer-deals/', wait_time=3, callback=self.parse )
def parse(self, response): products = response.xpath("//ul[@class='dealTiles categoryGridDeals blueprint']/li") for product in products: base_url = "https://slickdeals.net/computer-deals" yield { 'name': product.xpath(".//a[@class='itemTitle bp-p-dealLink bp-c-link']/text()").get(), 'link': base_url + product.xpath(".//a[@class='itemTitle bp-p-dealLink bp-c-link']/@href").get(), 'store_name': self.remove_characters(product.xpath("normalize-space(.//span[@class='blueprint']/button['itemStore bp-p-storeLink bp-c-linkableButton bp-c-button js-button bp-c-button--link']/text())").get()), 'price': product.xpath("normalize-space(.//div[@class='itemPrice wide ']/text())").get() }
next_page = product.xpath("//a[@data-role='next-page']/@href").get() if next_page: yield SeleniumRequest( url=f'https://slickdeals.net{next_page}', wait_time=3, callback=self.parse )
|
run
1
| (myenv10_scrapy) D:\work\run\python_crawler\107-selenium\silkdeals>scrapy crawl computerdeals
|