Convert news web page into rss feed

At the company I work (BORM), we have an internal web page with the company news. We’re required to log in and check every once in a while.

Now to get notified automatically, wehen some news are to be discovered, I hacked together a PHP script that logs in, downloads and parses the news. It then serves an rss xml file to my feed reader.

In the past I didn’t do a lot of PHP scripting. Only editing some bits and pieces. So this is my biggest endeavor to PHP.

Tested with liferea and BeyondPod.

I won’t tell you where the script can be accessed, as the company news requires login. But the script itself is here:

header(‘Content-type: application/xml’);

echo “<rss version=”2.0″>n”;
echo “t<channel>n”;
echo “tt<title>BormIntern</title>n”;
echo “tt<description>News aus dem BORM Point</description>n”;
echo “tt<language>de</language>n”;
echo “tt<link></link>n”;
echo “tt<lastBuildDate>Thu, 10. Feb 2011 00:00:00 GMT</lastBuildDate>n”;

$ckfile = tempnam (“/tmp”, “CURLCOOKIE”);

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, “”);
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($ch, CURLOPT_USERAGENT, “php script to generate an rss feed for the BORM news”);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
$output = curl_exec($ch);

curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, “Schritt=2&Seite=index.asp&sel_Adr=150&Passwort=mysecretpassword&submit=Login”);
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile);
curl_setopt($ch, CURLOPT_REFERER, “”);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$output = curl_exec($ch);


$dom = new domDocument;
echo “failed to parse the html”;
$tables = $dom->getElementsByTagName(‘table’);
$tableid = $tables->length – 2;
$rows = $tables->item($tableid)->getElementsByTagName(‘tr’);

$firstitem = true;
foreach($rows as $row)
$cols = $row->getElementsByTagName(‘td’);

if($cols->length > 0 && strlen($cols->item(0)->nodeValue) > 0)
$msgtext = $cols->item(0)->nodeValue;

if($cols->item(0)->getElementsByTagName(‘strong’)->length > 0)
echo “ttt</description>n”;
echo “tt</item>n”;
$firstitem = false;

echo “tt<item>n”;
echo “ttt<title>”;
echo $msgtext;
echo “</title>n”;
echo “ttt<link></link>n”;
echo “ttt<pubDate>Wed, 9. Feb 2011 00:00:00 GMT</pubDate>n”;
echo “ttt<guid>”;
echo md5($msgtext);
echo “</guid>n”;
echo “ttt<description>”;
else if($cols->length > 1 && $cols->item(1)->getElementsByTagName(‘a’)->length)
$link = $cols->item(1)->getElementsByTagName(‘a’)->item(0);
echo “<a href=”” . $link->getAttribute(“href”) . “”>” . $link->childNodes->item(0)->nodeValue . “</a><br />n”;
echo $msgtext;
echo “<br />n”;
echo “ttt</description>n”;
echo “tt</item>n”;

echo “t</channel>n”;
echo “</rss>n”;


Leave a Reply

Your email address will not be published. Required fields are marked *