1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| paging_div = soup.find('div', 'btn-group btn-group-paging') push_str = d.find('div', 'nrec').text href = d.find('a')['href'] title = d.find('a').text author = d.find('div', 'author').text if d.find('div', 'author') else ''
divs = soup.find_all('div', 'r-ent') for d in divs: .....
print(soup.h4) print(soup.find("h4"))
print(soup.find("h4").text)
print(soup.h4.a.text)
for h4 in h4_tags: print(h4.a.text) print(soup.h4.a.text)
h4_tags = soup.find_all('h4', 'card-title') for h4 in h4_tags: print(h4.a.text)
print('soup.find(id="mac-p").text.strip() : ') print('-'+soup.find(id='mac-p').text.strip()+'-')
print('===============================')
print(soup.find_all('', {'data-foo': 'mac-foo'}))
divs = soup.find_all('div', 'content') for div in divs: print(div.h6.text.strip(), div.h4.text.strip(), div.p.text.strip())
for div in divs: print([s for s in div.stripped_strings])
price = link.parent.previous_sibling.text
all_tds = [td for td in row.children]
if 'href' in all_tds[3].a.attrs : href = all_tds[3].a['href'] else: href = None;
next = soup.find('li', {'class': 'nexttxt'}).find('a')
print('next url = ' + next['href'])
movie_info = movie.find('div', {'class': 'release_info'}) if movie_info: trailer_url = unquote(movie_info.find('div', {'class': 'release_movie_name'}).find('a')['href'], 'utf-8') exceptation = movie_info.find('div', {'class': 'leveltext'}) if exceptation: pexceptation = exceptation.text.strip().split('\n')[0] else: pexceptation = "" item = { 'ch_name': movie_info.find('div', {'class': 'release_movie_name'}).find('a').text.strip(), 'en_name': movie_info.find('div', {'class': 'en'}).find('a').text.strip(), 'expectation': pexceptation, 'intro': movie_info.find('div', {'class': 'release_text'}).find('span').text.strip(), 'poster_url': movie.find('div', {'class': 'release_foto'}).find('img')['data-src'], 'release_date': movie_info.find('div', {'class': 'release_movie_time'}).text.strip().split(' ')[-1], 'trailer_url': trailer_url }
|