#!/usr/bin/perl -w # tree.pl. Creates a structured HTML list ('sitemap') of files. # Copyright (C) 1998-2000 Daniel Naber # version 1.18, 2000-05-10 (version number is independent from java version) # Latest version: http://www.ev-stift-gymn.guetersloh.de/server/tree_e.html # # HOW TO USE THIS: # 1. On Unix, check if the path to perl in the very first line is correct. # 2. Go to the configuration section and adapt the values. Note that # the most important values are at the top, so if something # fails the error is probably there. # 3. On Unix, set execute permission: chmod u+x tree.pl # 4. Call the script from command line: ./tree.pl [htmldir] >output.html # or call it as CGI (set $cgi = 1): http://yourdomain/cgi-bin/tree.pl # (if it doesn't work as CGI, see: # http://www.perl.com/CPAN-local/doc/FAQs/cgi/idiots-guide.html) # # TODO/BUGS/PROBLEMS: # -links beneath $htmldir will be ignored # -due to stupid program design, you cannot make a tree that only # consists of files given in @indexfiles # -html output has a small flaw: , i.e. an "empty list", # as weblint complaints - most browsers don´t care # -ignores meta tags for robots # # CHANGES (important ones): # 1997-09-07: first version # 1999-08-29: added $filetag_end, thanks to Doug Melton; # small html cleanup # 1999-09-15: put out more $filetag_end, thanks # to Georg Bischof # 1999-12-03: trailing slash now auotmatically chopped off, thanks to # Mark Williams # 2000-02-03: documentation update; functions now have prototypes # and are called without the "&"; more sourcecode documentation; # errors go to STDERR again # 2000-04-04: print out warning if $htmldir isn't an absolute path; # getdate() now uses a more "modern" way to get the date # 2000-05-10: files without are now also linked (only if # $listwithouttitle = 1, thanks to SKiNNY <skinny1@xs4all.nl>); # added BASE documentation to tree-template.html # # COPYRIGHT: # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ================================================================================================= # === user-configurable options =================================================================== # ================================================================================================= if( $ARGV[0] ) { $htmldir = $ARGV[0]; } else { # directory with the html files (may be overriden by command line argument) # on Windows: use the slash (/) instead of double backslash (\\) $htmldir = "/home/sanyaro"; } # take this file as a template to build the output page (try to use an absolute path # if the script fails, escpecially as CGI): $templatefile = "/home/sanyaro/maper/temp.html"; # comment in this line and you'll just get the list: #$templatefile = "sitemap"; $cgi = 1; # use this as a cgi script? (0=no, 1=yes) $baseurl = ""; # this will be the first part of any URL (normally you can leave it empty) @indexfiles = ('index.html', 'index.shtml'); # default-files' names @inchtml = ('shtml', 'html', 'htm'); # take files with these suffixes as HTML files @incpics = (); # include pictures with these suffixes #@incpics = (); # don't include pictures $listsize = 0; # include size in kb for every file? (0=no, 1=yes) # set the following options both to 1 to generate a list you can use offline: $offline = 0; # make links offline-enabled (use from local disk) (0=no, 1=yes) $indexrefs = 0; # make links to 'dir/index.html' etc. (instead of 'dir/') (0=no, 1=yes) # do only include these files/directories, use '*' as a wildcard, # use '@includefiles = ();' to include all files except those in @excludefiles: @includefiles = ('/tour/*','/climb/*'); # do not include these files/directories: @excludefiles = ('/tour/etour/news/*','/climb/cnews/*'); # include html files without <title>... resp. with empty title? (0=no, 1=yes): $listwithouttitle = 0; # output file. Set path relative to $htmldir. This is only so that the generated list # won't have a link to itself: $self = "/maper/temp.html"; $selftitle = "»çÀÌÆ® ¸Ê"; # how to call this script's output in the sitemap $date = "YEAR-MONTH-DAY"; # date according to ISO 8601 #$date = "DAY.MONTH.YEAR"; # german format #$date = "MONTH/DAY/YEAR"; # american format # for those of you who like the plain output: $dirtag = '
    '; $dirtag_end = '
'; $foldertag = '
  • '; $htmltag = '
  • '; $pictag = '
  • '; $nolinktag = '
  • '; $filetag_end = '
  • '; # for those who like output with an icon in front of every item # (you better also set the width and height): #$dirtag = '
    '; #$dirtag_end = '
    '; #$foldertag = '
    folder'; #$htmltag = '
    html file'; #$pictag = '
    picture'; #$nolinktag = '
    other file'; #$filetag_end = '
    '; # how to mark files that changed not long ago: $modifiedtag = 'new '; $modifiedtime = '10*24'; # mark files that are not older than $modifiedtime hours (0 = option off) #$modifiedtime =0; # ================================================================================================= # === no configuration below ====================================================================== # ================================================================================================= # the prototypes: sub getdate(); sub init(); sub first_part_output(); sub doperfile(); sub list_output(); sub last_part_output(); sub getdefaultfile($); sub getinfo($$); sub is_it_modified($); sub getsize($); sub isfileclass($@); sub isfile($@); sub load_part($); sub load($); sub minzero($); # $st = time(); # comment in if you're interested in runtime use File::Find; $depth = 0; ($htmlct, $htmlsize, $picct, $picsize) = (0, 0, 0, 0); # count size und number $partlist = ""; getdate(); init(); first_part_output(); find(\&doperfile, $htmldir); list_output(); last_part_output(); # $diff = time() - $st; print STDERR "time: $diff secs\n"; # see above exit; # -------------------------------------------------------------------------- # Sets the global variable $date to the date that's now. sub getdate() { use Time::localtime; my $year = localtime->year() + 1900; my $mon = localtime->mon() + 1; my $day = localtime->mday(); $mon = "0".$mon if( length($mon) == 1 ); $day = "0".$day if( length($day) == 1 ); $date =~ s#YEAR#$year#i; $date =~ s#MONTH#$mon#i; $date =~ s#DAY#$day#i; } # Does some checks and notifies the user of problems. sub init() { if( $cgi ) { select(STDOUT); $| = 1; $nph = 1 if( $0 =~ m#nph-tree# ); print "HTTP/1.0 200 OK\n" if( $nph ); print "Content-Type: text/html\n\n"; } if( ! -d $htmldir ) { die "Error: $0: '$htmldir' does not exist, is not a directory or is not readable.\n"; } if( -l $htmldir ) { die "Error: $0: cannot start on a symbolic link (symbolic links\n below the html directory will be ignored anyway): '$htmldir'.\n"; exit; } if( $htmldir !~ m#^/# ) { print STDERR "Warning: $0: Try setting \$htmldir to an absolute path for better results.\n"; } if( $htmldir =~ m#/$# ) { # possibly remove trailing slash chop($htmldir); } my $expat; # enable '*' as wildcard in @excludefiles foreach $expat (@excludefiles) { $expat =~ s#\*#.*?#g; } foreach $expat (@includefiles) { # the same in @includefiles $expat =~ s#\*#.*?#g; } } # Print out the first part of the list. Loads template file, substitutes date from the template. sub first_part_output() { if( $templatefile ) { $output = load($templatefile); } else { $output = ""; } $output =~ s##$date#igs; my ($first_part) = ($output =~ m#^(.*?)#is); $first_part = "" if ( ! defined($first_part) ); # avoid warning print $first_part; } # This is the function connected to 'find', i.e. it's called once for every file in the tree. sub doperfile() { my $thisfile = $File::Find::name; # the filename that Find provides for us $thisfile .= "/" if( -d $thisfile ); my ($thisfile_rel) = ($thisfile =~ m#^$htmldir(/.*)#); # part after $htmldir if( ! $thisfile_rel ) { # works better with relative path in $htmldir $thisfile_rel = "."; } my $expat; # include only files from @includefiles: if( scalar(@includefiles) >= 1 ) { my $do_use = 0; foreach $expat (@includefiles) { if( $thisfile_rel =~ m#^$expat$# ) { $do_use = 1; last; } } return if( ! $do_use ); } # exclude files from @excludefiles: foreach $expat (@excludefiles) { return if( $thisfile_rel =~ m#^$expat$# ); } if( ! isfile($thisfile, @indexfiles) && ($thisfile =~ m#/$# || isfileclass($thisfile, @inchtml) || isfileclass($thisfile, @incpics)) ) { push(@filelist, $thisfile); } } # Make the list. sub list_output() { my $thisfile; my $dirsdone = ""; # have we been here already? my $thisdir = ""; my $dirtag_ct = 0; @filelist = sort(@filelist); print "$dirtag\n"; $dirtag_ct++; # This is the loop which generated the nested list: foreach $thisfile (@filelist) { ($url) = ($thisfile =~ m#$htmldir(/.*)#i); $olddepth = $depth; $depth = ($url =~ s#/#/#gi); # 1 = html-root $olddir = $thisdir; ($thisdir) = ($url =~ m#(.*/).*?#i); if( $thisdir ne $olddir && ! ($dirsdone =~ m#^\Q$thisdir\E$#m) ) { # deeper level or same level $dirsdone .= "$thisdir\n"; $partlist .= "$dirtag_end\n" x ($olddepth-$depth+1); $dirtag_ct -= minzero($olddepth-$depth+1); $partlist .= " $nolinktag$baseurl$url$filetag_end\n" if( ! getdefaultfile("$htmldir$thisdir") ); $partlist .= "$dirtag\n"; $dirtag_ct++; } elsif( ! ($thisdir =~ m#\Q$olddir\E#i) ) { # higher level $partlist .= "$dirtag_end\n" x ($olddepth-$depth); $dirtag_ct -= minzero($olddepth-$depth); $partlist .= getinfo($thisfile, 0); } else { # same level as before $partlist .= getinfo($thisfile, 0); $partlist =~ s#$dirtag\n$dirtag_end\n##ig; # clean up HTML print $partlist; $partlist = ""; } } print $partlist; # close list correctly: print "$dirtag_end\n" x $dirtag_ct; } # Prints the last part of the list. Substitutes some strings that might be in the template. sub last_part_output() { $htmlsize = int($htmlsize/1000); # size in kB $picsize = int($picsize/1000); $output =~ s##$htmlct#igs; $output =~ s##$htmlsize#igs; $output =~ s##$picct#igs; $output =~ s##$picsize#igs; my ($last_part) = ($output =~ m#(.*)$#is); $last_part = "" if ( ! defined($last_part) ); # avoid warning print $last_part; } # -------------------------------------------------------------------------- # Get default file (e.g. index.html) for a directory. Return 0 if there is no default file. sub getdefaultfile($) { my $dir = shift; my $item; foreach $item (@indexfiles) { if( -e "$dir$item" ) { # there's a defaultfile $partlist .= getinfo("$dir$item", 1); return $item; } } return 0; } # Get the string that will make up a file's entry in the list. sub getinfo($$) { my $thisfile = shift; my $isindexfile = shift; my ($suffix) = ($thisfile =~ m#.*\.(.*)#); my ($size, $exactsize) = getsize($thisfile); my $entry = ""; my $linkurl; $offline ? ($linkurl = $htmldir.$url) : ($linkurl = $url); $linkurl = $baseurl.$linkurl; if( isfileclass($thisfile, @inchtml) ) { $htmlsize += $exactsize; $htmlct++; my $string = load_part($thisfile); my $icontag = " $htmltag"; $icontag = " $foldertag" if( $isindexfile ); if( $thisfile eq "$htmldir$self" ) { # output file itself $entry .= " $nolinktag$selftitle$filetag_end"; } else { my ($titlestring) = ($string =~ m#(.*?)#is); # common case if( $listwithouttitle && (! $titlestring || $titlestring =~ m/^\s*$/) ) { $titlestring = $baseurl.$url; } if( $titlestring && $titlestring !~ m/^\s*$/ ) { $entry .= $icontag; $entry .= $modifiedtag if( is_it_modified($thisfile) ); $entry .= "$titlestring"; $entry .= " ($size kB)" if( $listsize ); $entry .= "$filetag_end"; } } $entry .= "\n"; } elsif( isfileclass($thisfile, @incpics) ) { $picsize += $exactsize; $picct++; my ($filenameonly) = ($url =~ m#.*/(.*)#i); $entry .= " $pictag"; $entry .= $modifiedtag if( is_it_modified($thisfile) ); $entry .= "$filenameonly"; $entry .= " ($size kB)" if( $listsize ); $entry .= "$filetag_end\n"; } # links to dir/ or to dir/index.html (see configuration section) if( $indexrefs ) { my ($filepart) = ($thisfile =~ m#.*/(.*)#); $entry =~ s#href="(.*?/)"#href="$1$filepart"#i; } return $entry; } # Check if file was modified recently. sub is_it_modified($) { my $filename = shift; ($mtime) = (stat($filename))[9]; if( $modifiedtime && ((time() - $mtime) < ($modifiedtime*60*60)) ) { return 1; } else { return 0; } } # Get file's size in (kB, bytes). sub getsize($) { my $file = shift; my $exactsize = -s ($file); my $size = int($exactsize/1000); $size = 1 if( $size == 0 ); return $size, $exactsize; } # Check if file's suffix is in an array of suffixes. sub isfileclass($@) { my $file = shift; my @fileclass = @_; my $item; foreach $item (@fileclass) { return 1 if( $file =~ m#\.$item$# ); } return 0; } # Check filename, e.g. if it's an index.html-like file. sub isfile($@) { my $file = shift; my @files = @_; my $item; foreach $item (@files) { return 1 if( $file =~ m#/$item$# ); } return 0; } # Only load part of a file till is reached. sub load_part($) { my $file = shift; my $string = ""; open(INPUT, "<$file") || die "Error: $0: Cannot open '$file': $!"; while() { $string .= $_; last if( $_ =~ m##i ); } close(INPUT); $string = "" if( ! defined($string) && $file =~ m#^$htmldir/$self$# ); # avoid warning return $string; } # Load a file. Takes filename, returns file's contents. sub load($) { my $file = shift; my $string; open(INPUT, "<$file") || die "Error: $0: Cannot open '$file': $!"; undef $/; $string = (); $/ = "\n"; close(INPUT); return $string; } # Returns 0 if argument is < 0, returns the argument otherwise. sub minzero($) { my $var = shift; if( $var > 0 ) { return $var; } else { return 0; } }