const fs = require('fs');
const axios = require('axios');
const cheerio = require('cheerio');
const Parser = require('rss-parser');
const parser = new Parser();
const rssUrl = 'https://feeds.bbci.co.uk/news/world/rss.xml';
const outputFile = 'index.html';
// Function to clean text (remove extra whitespace)
const cleanText = (text) => text.replace(/\s+/g, ' ').trim();
// Function to scrape full article content from a URL
async function scrapeArticle(url) {
try {
const { data } = await axios.get(url, {
headers: { 'User-Agent': 'Mozilla/5.0' } // Mimic browser to avoid blocking
});
const $ = cheerio.load(data);
// BBC article content selectors (adjust based on BBC's HTML structure)
const articleContent = $('article').find('p').map((i, el) => $(el).text()).get().join(' ');
const mainImage = $('img[src*="news"]').first().attr('src') || '';
const cleanedContent = cleanText(articleContent).substring(0, 2000); // Limit to 2000 chars
return { content: cleanedContent || 'No content available', image: mainImage };
} catch (error) {
console.error(`Error scraping ${url}: ${error.message}`);
return { content: 'Error fetching content', image: '' };
}
}
// Function to generate HTML
function generateHTML(posts) {
return `
My News Blog
My News Blog
${posts.map(post => `
`;
}
// Main function to fetch RSS and scrape articles
async function fetchAndGenerate() {
try {
const feed = await parser.parseURL(rssUrl);
const posts = [];
// Limit to 5 articles to avoid overwhelming the server
for (const item of feed.items.slice(0, 5)) {
const { content, image } = await scrapeArticle(item.link);
posts.push({
title: item.title,
link: item.link,
pubDate: item.pubDate,
content: content,
image: image
});
}
// Generate and save HTML
const html = generateHTML(posts);
fs.writeFileSync(outputFile, html);
console.log(`Blog posts generated successfully at ${outputFile}`);
} catch (error) {
console.error('Error:', error.message);
}
}
// Run the script
fetchAndGenerate();
// Optional: Auto-run every 1 hour (3600000 ms)
setInterval(fetchAndGenerate, 3600000);
${post.image ? `
` : ''}
`).join('')}
Comments
Post a Comment