Skip to main content
const fs = require('fs'); const axios = require('axios'); const cheerio = require('cheerio'); const Parser = require('rss-parser'); const parser = new Parser(); const rssUrl = 'https://feeds.bbci.co.uk/news/world/rss.xml'; const outputFile = 'index.html'; // Function to clean text (remove extra whitespace) const cleanText = (text) => text.replace(/\s+/g, ' ').trim(); // Function to scrape full article content from a URL async function scrapeArticle(url) { try { const { data } = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0' } // Mimic browser to avoid blocking }); const $ = cheerio.load(data); // BBC article content selectors (adjust based on BBC's HTML structure) const articleContent = $('article').find('p').map((i, el) => $(el).text()).get().join(' '); const mainImage = $('img[src*="news"]').first().attr('src') || ''; const cleanedContent = cleanText(articleContent).substring(0, 2000); // Limit to 2000 chars return { content: cleanedContent || 'No content available', image: mainImage }; } catch (error) { console.error(`Error scraping ${url}: ${error.message}`); return { content: 'Error fetching content', image: '' }; } } // Function to generate HTML function generateHTML(posts) { return ` My News Blog

My News Blog

${posts.map(post => `
${post.image ? `${post.title}` : ''}

${post.title}

Published: ${new Date(post.pubDate).toLocaleString()}

${post.content}

`).join('')}
`; } // Main function to fetch RSS and scrape articles async function fetchAndGenerate() { try { const feed = await parser.parseURL(rssUrl); const posts = []; // Limit to 5 articles to avoid overwhelming the server for (const item of feed.items.slice(0, 5)) { const { content, image } = await scrapeArticle(item.link); posts.push({ title: item.title, link: item.link, pubDate: item.pubDate, content: content, image: image }); } // Generate and save HTML const html = generateHTML(posts); fs.writeFileSync(outputFile, html); console.log(`Blog posts generated successfully at ${outputFile}`); } catch (error) { console.error('Error:', error.message); } } // Run the script fetchAndGenerate(); // Optional: Auto-run every 1 hour (3600000 ms) setInterval(fetchAndGenerate, 3600000);

Comments

Popular posts from this blog