|
|
|
|
|
|
| |
find url links in a page
|
<?php
// findlinks.php
// php code example: find links in an html page
// mallsop.com 2006 gpl
echo "<HTML><HEAD><TITLE>findlinks.php</TITLE></HEAD> \n";
echo "<BODY bgcolor=\"#336699\" text=\"000000\" link=\"#416D96\" vlink=\"#414141\" alink=\"#DDCEA2\"> \n";
echo "<form method=post action=\"$PHP_SELF\"> \n";
echo "<p><table align=\"absmiddle\" width=\"100%\" bgcolor=\"#ededed\" name=\"tablesiteopen\" border=\"0\">\n";
echo "<tr><td align=left>";
if ($_POST["FindLinks"]) {
$urlname = trim($_POST["urlname"]);
if ($urlname == "") {
echo "Please enter a urlname. <br>\n";
}
else { // open the html page and parse it
$page_title = "n/a";
$links[0] = "n/a";
//$meta_descr = "n/a";
//$meta_keywd = "n/a";
if ($handle = @fopen($urlname, "r")) { // must be able to read it
$content = "";
while (!feof($handle)) {
$part = fread($handle, 1024);
$content .= $part;
// if (eregi("</head>", $part)) break;
}
fclose($handle);
$lines = preg_split("/\r?\n|\r/", $content); // turn the content into rows
// boolean
$is_title = false;
//$is_descr = false;
//$is_keywd = false;
$is_href = false;
$index = 0;
//$close_tag = ($xhtml) ? " />" : ">"; // new in ver. 1.01
foreach ($lines as $val) {
if (eregi("<title>(.*)</title>", $val, $title)) {
$page_title = $title[1];
$is_title = true;
}
if (eregi("<a href=(.*)</a>", $val, $alink)) {
$newurl = $alink[1];
$newurl = eregi_replace(' target="_blank"', "", $newurl);
$newurl = trim($newurl);
$pos1 = strpos($newurl, "/>");
if ($pos1 !== false) {
$newurl = substr($newurl, 1, $pos1);
}
$pos2 = strpos($newurl, ">");
if ($pos2 !== false) {
$newurl = substr($newurl, 1, $pos2);
}
$newurl = eregi_replace("\"", "", $newurl);
$newurl = eregi_replace(">", "", $newurl);
//if (!eregi("http", $newurl)) { // local
// $newurl = "http://".$_SERVER["HTTP_HOST"]."/".$newurl;
// }
if (!eregi("http", $newurl)) { // local
$pos1 = strpos($newurl, "/");
if ($pos1 == 0) {
$newurl = substr($newurl, 1);
}
$newurl = $urlname.$newurl;
}
// put in array of found links
$links[$index] = $newurl;
$index++;
$is_href = true;
}
} // foreach lines done
echo "<p><b>Page Summary</b><br>\n";
echo "<b>Url:</b> ".$urlname."<br>\n";
if ($is_title) {
echo "<b>Title:</b> ".$page_title."<br>\n";
}
else {
echo "No title found<br>\n";
}
echo "<b>Links:</b><br>\n";
if ($is_href) {
foreach ($links as $myval) {
echo "Link: ".$myval."<br>\n";
}
}
else {
echo "No links found<br>\n";
}
echo "End</p>\n";
} // fopen handle ok
else {
echo "<br>The url $urlname does not exist or there was an fopen error.<br>";
}
echo "Press the back button to try again.<br>";
} // end else urlname given
} // else find links now submit
else {
$urlname = "http://www.google.com/intl/en/about.html"; // or whatever page you like
echo "<p><b>findlinks.php example</b><br>\n";
echo "File or URL: <input type=\"TEXT\" name=\"urlname\" value=\"$urlname\" maxlength=\"255\" size=\"80\"><br>\n";
echo "<input type=\"SUBMIT\" name=\"FindLinks\" value=\"Find\"></font><br></p> \n";
}
echo "</td></tr>";
echo "</table></p>";
echo "</form></BODY></HTML>\n";
?> | | |
|
| Check for functional file links (broken Files)
Categories : PHP, Data Validation, FTP, Regexps, Arrays | | | columned txt file to array()? Categories : Arrays, Strings, Regexps, PHP | | | Link Extractor - This function is used to extract links from a given URL. This will convert relative path into absolute path and also remove PHPSESSID stuff. Categories : PHP, URLs, Regexps | | | BBCode Formatting String Categories : PHP, HTML, Regexps, Arrays | | | grab the result of any calculation you submit to the Google Calculator. Categories : PHP, Arrays, Web Services, Regexps, Math. | | | Tag content retrieval from websites with preg_match Categories : PHP, Regexps, Arrays, HTML and PHP | | | Pageinfo: Array containing page URI, page query string (parameters), request method (GET or POST) and the complete URI Categories : Variables, PHP Options and Info, Arrays, URLs, PHP | | | Variable serialization and unserialization. Loading and saving variable structures
to and from file. Categories : Arrays, Filesystem, Variables, Strings, PHP | | | Extract keywords from a string having words in " " count as one string. Categories : PHP, Strings, Regexps, Search | | | Simple way of scaling any image to fit either given width or height. Categories : PHP, Graphics, Arrays | | | Get the self URL of current page Categories : PHP, URLs, Beginner Guides | | | PHP Random rss feeds - selects 49 random feeds from an unlimited list and displays them on your website. It's Ideal for those moments when you got 5 minutes and dont know which one of your feeds to read. Categories : PHP, Rich Site Summary (RSS), Arrays | | | WWW interface to Unix Manual(phpMan)
Categories : Program Execution, Strings, Arrays, PHP | | | Tweak Array, insert/add elements to any position of your arrays - delete elements from your arrays - move elements within your arrays - replace elements from your arrays ... the array, 'dynamically' grows or shrinks to whatever we tweak it. Categories : PHP Classes, Arrays, PHP | | | A recursive function to traverse a multi-dimensional array where the
dimensions are not known Categories : Arrays, PHP, Algorithms | |
|
|