2016-03-29 19 views
-1

Aşağıdaki kodda, verilerin tümünün "Allow Allowance" (Harcama Gidiş) dışında, resultsBlockFooter öğesinden ayrı olarak elde edilebiliyor.Kaynakta, verilerin çoğu bir Listede (li) ama gidiş izni span.I tarafından çevrili farklı varyasyonları denedim ama sadece bunu çıkarmak için sem cant yok.Teklifler takdir.BS açıklık etiketleri ile veri alamıyorum

 import csv 
from bs4 import BeautifulSoup 
import requests 



html = requests.get("http://www.sportinglife.com=156432).text 

soup = BeautifulSoup(html,'lxml') 

rows = [] 
for header in soup.find_all("div", class_="resultsBlockHeader"): 
    track = header.find("div",  class_="track").get_text(strip=True).encode('ascii', 'ignore').strip("|") 
    date = header.find("div", class_="date").get_text(strip=True).encode('ascii', 'ignore').strip("|") 
    datetime = header.find("div", class_="datetime").get_text(strip=True).encode('ascii', 'ignore').strip("|") 
    grade = header.find("div", class_="grade").get_text(strip=True).encode('ascii', 'ignore').strip("|") 
    distance = header.find("div", class_="distance").get_text(strip=True).encode('ascii', 'ignore').strip("|") 
prizes = header.find("div", class_="prizes").get_text(strip=True).encode('ascii', 'ignore').strip("|") 

results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line1") 
details = [] 
for result in results: 
    fin = result.find("li", class_="fin").get_text(strip=True) 
    greyhound = result.find("li", class_="greyhound").get_text(strip=True) 
    trap = result.find("li", class_="trap").get_text(strip=True) 
    sp = result.find("li", class_="sp").get_text(strip=True) 
    timeSec = result.find("li", class_="timeSec").get_text(strip=True) 
    timeDistance = result.find("li", class_="timeDistance").get_text(strip=True) 

    details.append({"greyhound": greyhound, "sp": sp, "fin": fin, "timeSec": timeSec, "timeDistance": timeDistance, "trap": trap }) 


results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2") 
for index, result in enumerate(results): 
    trainer = result.find("li", class_="trainer").get_text(strip=True) 
    details[index]["trainer"] = trainer 

results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line3") 
for index, result in enumerate(results): 
    comment = result.find("li", class_="comment").get_text(strip=True) 
    details[index]["comment"] = comment 

results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2") 
for index, result in enumerate(results): 
    firstessential = result.find("li", class_="first essential").get_text(strip=True) 
    details[index]["first essential"] = firstessential 

results = header.find_next_sibling("div", class_="resultsBlockFooter").find_all("ul", class_="line3") 
for index, result in enumerate(results): 
    goingAllowance = result.find("div", class_="Going Allowance").get_text(strip=True) 
    details[index]["Going Allowance"] = goingAllowance 

for detail in details: 
    detail.update({"track": track, "date": date, "datetime": datetime, "grade": grade, "prizes": prizes}) 
    rows.append(detail) 
with open("abc.csv","a") as f: 
    writer = csv.DictWriter(f,   [track","date","trap","fin","greyhound","datetime","sp","grade","distance"," prizes","timeSec","timeDistance","trainer","comment","first essential","going Allowance"]) 

    for row in rows: 
     writer.writerow(row) 

cevap

0

Tüm kodunuzu göndermek yerine ileride başvurmak için, ilgili parçaları eklemeniz yeterlidir. Ayrıca, yakalamada sorun yaşadığınız web sitesinin html'sini veya bölümünü de ekleyin. Web sitesine baktım ve sanırım kastettin mi?

test = soup.find("div", {"class":"resultsBlockFooter"}) 
'<div class="resultsBlockFooter"> 
<div><span>Going Allowance:</span> -10</div> 
<div><span>Forecast:</span> (3-4) £20.36 | <span>Tricast:</span> (3-4-2) £61.61</div> 
</div>' 

<div><span>Going Allowance:</span> -10</div>?

allowance = test.content[1].text #.content can be a helpful list of the tags 
"Going Allowance: -10" 
forecast, tricast = test.content[3].text.split("|") #the rest of useful text