Open main menu

Changes

no edit summary
[https://github.com/scrapinghub/splash Splash] is a JavaScript rendering service implemented in Python using Twisted and QT. Splash can be [http://splash.readthedocs.org/en/latest/scripting-tutorial.html scripted]. So, using Splash with Portia, we should be able to visually scrape OKC.
 
 
== Basic scraping with JavaScript ==
<syntaxhighlight lang=javascript>
document.querySelectorAll('h3.ud-accordion-panel-heading').forEach(function(e) {
console.log(e.innerText);
 
 
document.querySelectorAll("span[data-purpose='item-title']").forEach(function(e) {
console.log(e.innerText);
});
 
 
 
 
var subheadings = document.querySelectorAll("span[data-purpose='item-title']");
var subheadingTexts = Array.from(subheadings).map(function(subheading) {
return subheading.textContent.trim();
});
console.log(subheadingTexts.join("\n"));
 
 
var headings = document.querySelectorAll('h3.ud-accordion-panel-heading');
var headingTexts = Array.from(headings).map(function(heading) {
return heading.textContent.trim();
});
console.log(headingTexts.join('\n'));
</syntaxhighlight>
{{References}}
[[Category:Web]]
[[Category:JavaScript]]