|
|
|
|
|
|
| |
find url links in a page
| <?php
// findlinks.php
// php code example: find links in an html page
// mallsop.com 2006 gpl
echo "<HTML><HEAD><TITLE>findlinks.php</TITLE></HEAD> \n";
echo "<BODY bgcolor=\"#336699\" text=\"000000\" link=\"#416D96\" vlink=\"#414141\" alink=\"#DDCEA2\"> \n";
echo "<form method=post action=\"$PHP_SELF\"> \n";
echo "<p><table align=\"absmiddle\" width=\"100%\" bgcolor=\"#ededed\" name=\"tablesiteopen\" border=\"0\">\n";
echo "<tr><td align=left>";
if ($_POST["FindLinks"]) {
$urlname = trim($_POST["urlname"]);
if ($urlname == "") {
echo "Please enter a urlname. <br>\n";
}
else { // open the html page and parse it
$page_title = "n/a";
$links[0] = "n/a";
//$meta_descr = "n/a";
//$meta_keywd = "n/a";
if ($handle = @fopen($urlname, "r")) { // must be able to read it
$content = "";
while (!feof($handle)) {
$part = fread($handle, 1024);
$content .= $part;
// if (eregi("</head>", $part)) break;
}
fclose($handle);
$lines = preg_split("/\r?\n|\r/", $content); // turn the content into rows
// boolean
$is_title = false;
//$is_descr = false;
//$is_keywd = false;
$is_href = false;
$index = 0;
//$close_tag = ($xhtml) ? " />" : ">"; // new in ver. 1.01
foreach ($lines as $val) {
if (eregi("<title>(.*)</title>", $val, $title)) {
$page_title = $title[1];
$is_title = true;
}
if (eregi("<a href=(.*)</a>", $val, $alink)) {
$newurl = $alink[1];
$newurl = eregi_replace(' target="_blank"', "", $newurl);
$newurl = trim($newurl);
$pos1 = strpos($newurl, "/>");
if ($pos1 !== false) {
$newurl = substr($newurl, 1, $pos1);
}
$pos2 = strpos($newurl, ">");
if ($pos2 !== false) {
$newurl = substr($newurl, 1, $pos2);
}
$newurl = eregi_replace("\"", "", $newurl);
$newurl = eregi_replace(">", "", $newurl);
//if (!eregi("http", $newurl)) { // local
// $newurl = "http://".$_SERVER["HTTP_HOST"]."/".$newurl;
// }
if (!eregi("http", $newurl)) { // local
$pos1 = strpos($newurl, "/");
if ($pos1 == 0) {
$newurl = substr($newurl, 1);
}
$newurl = $urlname.$newurl;
}
// put in array of found links
$links[$index] = $newurl;
$index++;
$is_href = true;
}
} // foreach lines done
echo "<p><b>Page Summary</b><br>\n";
echo "<b>Url:</b> ".$urlname."<br>\n";
if ($is_title) {
echo "<b>Title:</b> ".$page_title."<br>\n";
}
else {
echo "No title found<br>\n";
}
echo "<b>Links:</b><br>\n";
if ($is_href) {
foreach ($links as $myval) {
echo "Link: ".$myval."<br>\n";
}
}
else {
echo "No links found<br>\n";
}
echo "End</p>\n";
} // fopen handle ok
else {
echo "<br>The url $urlname does not exist or there was an fopen error.<br>";
}
echo "Press the back button to try again.<br>";
} // end else urlname given
} // else find links now submit
else {
$urlname = "http://www.google.com/intl/en/about.html"; // or whatever page you like
echo "<p><b>findlinks.php example</b><br>\n";
echo "File or URL: <input type=\"TEXT\" name=\"urlname\" value=\"$urlname\" maxlength=\"255\" size=\"80\"><br>\n";
echo "<input type=\"SUBMIT\" name=\"FindLinks\" value=\"Find\"></font><br></p> \n";
}
echo "</td></tr>";
echo "</table></p>";
echo "</form></BODY></HTML>\n";
?> | | |
|
| Tag content retrieval from websites with preg_match Categories : PHP, Regexps, Arrays, HTML and PHP | | | Check for functional file links (broken Files)
Categories : PHP, Data Validation, FTP, Regexps, Arrays | | | columned txt file to array()? Categories : Arrays, Strings, Regexps, PHP | | | Link Extractor - This function is used to extract links from a given URL. This will convert relative path into absolute path and also remove PHPSESSID stuff. Categories : PHP, URLs, Regexps | | | Pageinfo: Array containing page URI, page query string (parameters), request method (GET or POST) and the complete URI Categories : Variables, PHP Options and Info, Arrays, URLs, PHP | | | BBCode Formatting String Categories : PHP, HTML, Regexps, Arrays | | | Check if a file exists on a remote FTP server with PHP Categories : PHP, FTP, Regexps | | | Array values from javascript to php Categories : PHP, Java Script, Arrays | | | clearing variables in php3 Categories : Variables, Arrays, PHP | | | This script is a contact form between users of a
website (kinda like the PM function on the forums)
Categories : PHP, Databases, MySQL, Regexps | | | Weighted Random - Random Scripts usually chose one out of each item, and each item have an equal chance to be chosen. But what if you want an item to be chosed more frequently than other? Categories : PHP, Math., Arrays | | | php jump urls...the best way Categories : PHP, URLs, Filesystem | | | Simple way to replace a variable value in a .conf (.ini) file using a
webbrowser - the first stage of a complete universal configuration editor Categories : PHP, Regexps, Code Editors, Filesystem | | | Display list of files within current and subdirectories (recursively) showing
each file as an anchored link and each directory as a category header. Categories : Filesystem, Directories, Arrays, PHP | | | ereg -- Regular expression match Categories : PHP, PHP Functions, Regexps | |
|
|
|