Aşağıdaki kodda, verilerin tümünün "Allow Allowance" (Harcama Gidiş) dışında, resultsBlockFooter öğesinden ayrı olarak elde edilebiliyor.Kaynakta, verilerin çoğu bir Listede (li) ama gidiş izni span.I tarafından çevrili farklı varyasyonları denedim ama sadece bunu çıkarmak için sem cant yok.Teklifler takdir.BS açıklık etiketleri ile veri alamıyorum
import csv
from bs4 import BeautifulSoup
import requests
html = requests.get("http://www.sportinglife.com=156432).text
soup = BeautifulSoup(html,'lxml')
rows = []
for header in soup.find_all("div", class_="resultsBlockHeader"):
track = header.find("div", class_="track").get_text(strip=True).encode('ascii', 'ignore').strip("|")
date = header.find("div", class_="date").get_text(strip=True).encode('ascii', 'ignore').strip("|")
datetime = header.find("div", class_="datetime").get_text(strip=True).encode('ascii', 'ignore').strip("|")
grade = header.find("div", class_="grade").get_text(strip=True).encode('ascii', 'ignore').strip("|")
distance = header.find("div", class_="distance").get_text(strip=True).encode('ascii', 'ignore').strip("|")
prizes = header.find("div", class_="prizes").get_text(strip=True).encode('ascii', 'ignore').strip("|")
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line1")
details = []
for result in results:
fin = result.find("li", class_="fin").get_text(strip=True)
greyhound = result.find("li", class_="greyhound").get_text(strip=True)
trap = result.find("li", class_="trap").get_text(strip=True)
sp = result.find("li", class_="sp").get_text(strip=True)
timeSec = result.find("li", class_="timeSec").get_text(strip=True)
timeDistance = result.find("li", class_="timeDistance").get_text(strip=True)
details.append({"greyhound": greyhound, "sp": sp, "fin": fin, "timeSec": timeSec, "timeDistance": timeDistance, "trap": trap })
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for index, result in enumerate(results):
trainer = result.find("li", class_="trainer").get_text(strip=True)
details[index]["trainer"] = trainer
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line3")
for index, result in enumerate(results):
comment = result.find("li", class_="comment").get_text(strip=True)
details[index]["comment"] = comment
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for index, result in enumerate(results):
firstessential = result.find("li", class_="first essential").get_text(strip=True)
details[index]["first essential"] = firstessential
results = header.find_next_sibling("div", class_="resultsBlockFooter").find_all("ul", class_="line3")
for index, result in enumerate(results):
goingAllowance = result.find("div", class_="Going Allowance").get_text(strip=True)
details[index]["Going Allowance"] = goingAllowance
for detail in details:
detail.update({"track": track, "date": date, "datetime": datetime, "grade": grade, "prizes": prizes})
rows.append(detail)
with open("abc.csv","a") as f:
writer = csv.DictWriter(f, [track","date","trap","fin","greyhound","datetime","sp","grade","distance"," prizes","timeSec","timeDistance","trainer","comment","first essential","going Allowance"])
for row in rows:
writer.writerow(row)