Commit aaff4ad6 authored by Cerdic's avatar Cerdic

il faut curl toutes les URLs et ne garder que celles qui sont bien en 2xx

parent 19a2b365
[(#SET{erreur,<:offline:erreur_contenu_non_disponible:>})
][(#SET{url_base,#VAL{./}|url_absolue})
][(#SET{url_base,#REM|url_de_base})
][(#INCLURE{fond=404,status=404,erreur=#GET{erreur}}|offline_inserer_head{
[<base href="(#GET{url_base})" />
<style>
......
......@@ -259,12 +259,7 @@ function offline_ressources_from_url($url, $force_refresh = false, $profondeur =
return array();
}
$ressources = array(
$url
);
if ($profondeur<=0) {
return $ressources;
}
$ressources = array();
// trouver les ressources de l'URL si c'est un contenu qu'on sait parser
$parts = parse_url($url);
......@@ -276,37 +271,46 @@ function offline_ressources_from_url($url, $force_refresh = false, $profondeur =
}
$offline_ressources_from_parse = false;
if ($extension == 'php'
or $offline_ressources_from_parse = charger_fonction('offline_ressources_from_parse_' . $extension, 'inc', true)) {
//var_dump("$profondeur:$url", $offline_ressources_from_parse);
$delai_cache = 86400;
if ($force_refresh) {
$delai_cache = 0;
}
include_spip('inc/distant');
$res = recuperer_url_cache($url, array('delai_cache' => $delai_cache));
if ($extension == 'php') {
if ($res and isset($res['headers'])) {
if (preg_match(',Content-Type:\s*(\w+/\w+),', $res['headers'], $m)
and $mime = $m[1]
and $extension = sql_getfetsel('extension', 'spip_types_documents', 'mime_type=' . sql_quote($mime))) {
}
}
// en dernier recours si ca ressemble a du html essayons !
if ($extension === 'php'
and isset($res['page'])
and strpos($res['page'], "<!DOCTYPE") !== false
and strpos($res['page'], "<html") !== false
and strpos($res['page'], "</html>") !== false) {
$extension = 'html';
}
if ($extension !== 'php') {
$offline_ressources_from_parse = charger_fonction('offline_ressources_from_parse_' . $extension, 'inc', true);
//var_dump("$profondeur:$url", $offline_ressources_from_parse);
$delai_cache = 86400;
if ($force_refresh) {
$delai_cache = 0;
}
include_spip('inc/distant');
$res = recuperer_url_cache($url, array('delai_cache' => $delai_cache));
#echo "$url : " . $res['status'] . "\n";
if (!$res or !isset($res['status']) or $res['status']>=300) {
return $ressources;
}
$ressources[] = $url;
if ($profondeur<=0) {
return $ressources;
}
// parser le contenu si on sait faire
// si c'est un spip.php on regarde le content-type pour savoir le type final
// si on trouve pas on essaye de reconnaitre du html...
if ($extension == 'php'){
if ($res and isset($res['headers'])){
if (preg_match(',Content-Type:\s*(\w+/\w+),', $res['headers'], $m)
and $mime = $m[1]
and $extension = sql_getfetsel('extension', 'spip_types_documents', 'mime_type=' . sql_quote($mime))){
}
}
// en dernier recours si ca ressemble a du html essayons !
if ($extension==='php'
and isset($res['page'])
and strpos($res['page'], "<!DOCTYPE")!==false
and strpos($res['page'], "<html")!==false
and strpos($res['page'], "</html>")!==false){
$extension = 'html';
}
}
$offline_ressources_from_parse = charger_fonction('offline_ressources_from_parse_' . $extension, 'inc', true);
if ($offline_ressources_from_parse and $res and $res['page']) {
$sub_ressources = $offline_ressources_from_parse($res['page'], $url);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment