python 3.x - How to get certain text from a url links -
so im trying statistics in statistics box page on url page each team. example of page looks on hyperlink put below. im trying have if prints out;
month : win % month : win % time: win%
but not sure how write code, since last piece of code wrote in main giving me error.
http://www.gosugamers.net/counterstrike/teams/16448-nasty-gravy-runners
import time import requests bs4 import beautifulsoup def get_all(url, base): # called print team links r = requests.get(url) page = r.text soup = beautifulsoup(page, 'html.parser') team_links in soup.select('div.details h3 a'): members = int(team_links.find_next('th', text='members:').find_next_sibling('td').text.strip().split()[0]) if members < 5: continue yield base + team_links['href'] next_page = soup.find('div', {'class': 'pages'}).find('span', text='next') while next_page: # gives server break time.sleep(0.2) r = requests.get(base_url + next_page.find_previous('a')['href']) page = r.text soup = beautifulsoup(page) team_links in soup.select('div.details h3 a'): yield base_url + team_links['href'] next_page = soup.find('div', {'class': 'pages'}).find('span', text='next') if __name__ == '__main__': base_url = 'http://www.gosugamers.net' url = 'http://www.gosugamers.net/counterstrike/teams' links in get_all(url, base_url): # when run generate links teams r = requests.get(links) page = r.content soup = beautifulsoup(page) statistics in soup.select('div.statistics tr'): win_rate = int(statistics.find('th', text='winrate:').find_next_sibling('td')) print(win_rate)
not sure want team stats:
from bs4 import beautifulsoup, tag import requests soup = beautifulsoup(requests.get("http://www.gosugamers.net/counterstrike/teams/16448-nasty-gravy-runners").content) table = soup.select_one("table.stats-table") head1 = [th.text.strip() th in table.select("tr.header th") if th.text] head2 = [th.text.strip() th in table.select_one("tr + tr") if isinstance(th, tag)] scores = [th.text.strip() th in table.select_one("tr + tr + tr") if isinstance(th, tag)] print(head1, head2, scores)
output:
([u'jun', u'may', u'all time'], [u'winrate:', u'0%', u'0%', u'0%'], [u'matches played:', u'0 / 0 / 0', u'0 / 0 / 0', u'0 / 0 / 0'])
Comments
Post a Comment