Spiders: Difference between revisions
No edit summary |
No edit summary |
||
| Line 12: | Line 12: | ||
[https://github.com/scrapinghub/splash Splash] is a JavaScript rendering service implemented in Python using Twisted and QT. Splash can be [http://splash.readthedocs.org/en/latest/scripting-tutorial.html scripted]. So, using Splash with Portia, we should be able to visually scrape OKC. | [https://github.com/scrapinghub/splash Splash] is a JavaScript rendering service implemented in Python using Twisted and QT. Splash can be [http://splash.readthedocs.org/en/latest/scripting-tutorial.html scripted]. So, using Splash with Portia, we should be able to visually scrape OKC. | ||
== Basic scraping with JavaScript == | |||
<syntaxhighlight lang=javascript> | |||
document.querySelectorAll('h3.ud-accordion-panel-heading').forEach(function(e) { | |||
console.log(e.innerText); | |||
document.querySelectorAll("span[data-purpose='item-title']").forEach(function(e) { | |||
console.log(e.innerText); | |||
}); | |||
var subheadings = document.querySelectorAll("span[data-purpose='item-title']"); | |||
var subheadingTexts = Array.from(subheadings).map(function(subheading) { | |||
return subheading.textContent.trim(); | |||
}); | |||
console.log(subheadingTexts.join("\n")); | |||
var headings = document.querySelectorAll('h3.ud-accordion-panel-heading'); | |||
var headingTexts = Array.from(headings).map(function(heading) { | |||
return heading.textContent.trim(); | |||
}); | |||
console.log(headingTexts.join('\n')); | |||
</syntaxhighlight> | |||
{{References}} | {{References}} | ||
[[Category:Web]] | [[Category:Web]] | ||
[[Category:JavaScript]] | |||