#########################################################################
# imageGrabber.php v1.0 #
# ----------- #
# Copyright (C) 2005 Aristidis Karidis, aris.karidis@bcs.org #
# ---------------------------------------------------------- #
# This function grabs the images from one or more URLs and saves them #
# to a specified local directory. #
# #
#########################################################################
# #
# This program is free software; you can redistribute it and/or #
# modify it under the terms of the GNU General Public License #
# as published by the Free Software Foundation; either version 2 #
# of the License, or (at your option) any later version. #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ------------------------------------ #
# http://www.gnu.org/copyleft/gpl.html #
#########################################################################
/**
* This function mines the image URLs form one or more webpages, returning an array of links.
*
* @param string $url
* @param int $unique
* @return array
*/
function imageGrabber($url, $unique = 1)
{
$startTag = '<img';
$srcTag = 'src=';
$endTag = '>';
$counter = 0;
if(!is_array($url))
{
$url = array($url);
}
if ($unique !== 0 && $unique !== 1)
{
printf('Invalid parameter for $unique. The parameter must be either 1 or 0.');
exit();
}
foreach ($url as $value)
{
$contents = file_get_contents($value);
if ($pos)
{
$domain = substr($domain, 0, stripos($domain, '/'));
}
while ($contents)
{
set_time_limit(0); # In case we have several large pages
$quotes = array('"', "'", "\n");
$contents = str_replace($quotes, '', $contents); # Strip " and ' as well as \n from input string
$contents = stristr($contents, $startTag); # Drop everything before the start tag '<img'
$contents = stristr($contents, $srcTag); # Drop everything before the 'src'
$endTagPosition = stripos($contents, $endTag); # Position of the end tag '>'
$src = substr($contents, 4, $endTagPosition - 4); # Get everything from src to end tag --> 'src="path" something>'
$spacePosition = stripos($src, ' '); # Position of space (if it exists)
if ($spacePosition !== false)
{
$src = substr($src, 0, $spacePosition); # Drop everything after space, keeping 'src="path"'
}
$questionMarkPosition = stripos($src, '?');
if ($questionMarkPosition !== false)
{
$src = substr($src, 0, $questionMarkPosition); # Remove any part after a '?'
}
$contents = stristr($contents, $endTag); # Drop everything before the end tag '>'
if ($src)
{
if (stripos($src, '/') === 0)
{
$src = 'http://'.$domain.$src; # Relative link, so add domain before '/'
}
else
{
if (stripos($src, 'http://') !== 0 && stripos($src, 'https://') !== 0 && stripos($src, 'ftp://') !== 0)
{
$src = 'http://'.$domain.'/'.$src; # Relative link, so add domain and '/'
}
}
$paths[] = $src;
}
}
if ($unique === 1)
{
$results[] = array_unique($paths); # Create final array with unique $paths
}
else
{
$results[] = $paths; # Create final array with all $paths
}
/**
* This function will downlaod and save all images on the specified directory.
*
* @param array $results
* @param sting $localPath
*/
function saveImages($results, $localPath = 'C:\\Temp\\test\\')
{
foreach ($results as $v)
{
foreach ($v as $value)
{
set_time_limit(0);
$path = $value;
if (!file_exists($localPath))
{
mkdir($localPath); # Create the dir if it doesn't exist
}