Saturday, 9 July 2016

javascript - NodeJS: Trouble scraping two URLs with promises



I'm scraping r/theonion and writing the titles to a text file, onion.txt. After that, I am intending to scrape r/nottheonion and writing the titles to a text file, nottheonion.txt. I succeed in writing to onion.txt, but not to nottheonion.txt.




var onion_url = "https://www.reddit.com/r/theonion";
var not_onion_url = "https://www.reddit.com/r/nottheonion";

var promise = new Promise(function(resolve, reject) {

request(onion_url, function(error, response, html) {
if (error) {
console.log("Error: " + error);
}


var $ = cheerio.load(html);

$("div#siteTable > div.link").each(function(idx) {
var title = $(this).find('p.title > a.title').text().trim();
console.log(title);

fs.appendFile('onion.txt', title + '\n');
});
});
});


promise.then(function(result) {
request(not_onion_url, function(error, response, html) {
if (error) {
console.log("Error: " + error);
}

var $ = cheerio.load(html);

$("div#siteTable > div.link").each(function(idx) {

var title = $(this).find('p.title > a.title').te . xt().trim();
console.log(title);

fs.appendFile('not_onion.txt', title + '\n');
});
});
}, function(err) {
console.log("Error with scraping r/nottheonion");
});


Answer



Use request-promise
and fs-promise to simplify your code if you want to use promises anyway, and use function to not repeat yourself.



var rp = require('request-promise');
var fsp = require('fs-promise');

var onion_url = "https://www.reddit.com/r/theonion";
var not_onion_url = "https://www.reddit.com/r/nottheonion";


function parse(html) {
var result = '';
var $ = cheerio.load(html);
$("div#siteTable > div.link").each(function(idx) {
var title = $(this).find('p.title > a.title').text().trim();
console.log(title);
result += title + '\n';
});
return result;
}


var append = file => content => fsp.appendFile(file, content);

rp(onion_url)
.then(parse)
.then(append('onion.txt'))
.then(() => console.log('Success'))
.catch(err => console.log('Error:', err));

rp(not_onion_url)

.then(parse)
.then(append('not_onion.txt'))
.then(() => console.log('Success'))
.catch(err => console.log('Error:', err));


This is not tested.


No comments:

Post a Comment

c++ - Does curly brackets matter for empty constructor?

Those brackets declare an empty, inline constructor. In that case, with them, the constructor does exist, it merely does nothing more than t...