diff options
author | Solomon Peachy <pizza@shaftnet.org> | 2013-08-23 22:19:55 -0400 |
---|---|---|
committer | Solomon Peachy <pizza@shaftnet.org> | 2013-08-23 22:19:55 -0400 |
commit | 001c5dac095d31fba4ba545ed3902e2ab026792a (patch) | |
tree | 51f3682f647dc04797106369a57f78ea0a27d188 | |
parent | 14758322e8fbffe964a1003da03dad39e4183edd (diff) | |
download | slp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.tar.gz slp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.tar.bz2 slp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.zip |
work-in-progress changes to the nb2ikiwiki script.
-rw-r--r-- | nb2ikiwiki.pl | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/nb2ikiwiki.pl b/nb2ikiwiki.pl new file mode 100644 index 0000000..80766c0 --- /dev/null +++ b/nb2ikiwiki.pl @@ -0,0 +1,192 @@ +#!/usr/bin/perl +# +# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki +# +# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote +# this script. As long you retain this notice, you can do whatever you want +# with it. If we meet some day, and you feel like it, you can buy me a hot +# beverage of my choice in return. + +use strict; +use warnings; + +use HTML::WikiConverter::Markdown; +use Date::Manip; + +my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data"; +opendir(IN, $input_directory) or die "Unable to open input directory"; + +my @files = readdir(IN); + +# Identify database files and store tags for the filenames; the tags are forced +# to become lowercase + +my %tags = (); +foreach my $file (@files) +{ + if($file =~ m/\.db$/i && !($file eq "master.db")) + { + open(DB, $input_directory . "/" . $file) or die "Unable to open database file"; + + my $category = lc(<DB>); # Category is always the first line of the file + chomp($category); + $category =~ tr/ /_/; + + foreach my $article (<DB>) + { + # Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename. + $article =~ m/(.*\.txt).*/; + + if(exists($tags{$1})) + { + $tags{$1} .= $category . " "; + } + else + { + $tags{$1} = $category . " "; + } + } + + close(DB); + } +} + +# Process articles + +my $wc = new HTML::WikiConverter( dialect => 'Markdown', + link_style => 'inline', + image_style => 'inline', + header_style => 'atx'); + +foreach my $file (@files) +{ + if($file =~ m/\.txt$/i) + { + open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file"; + + # This will store the lines that belong to the actual content + # of the article + my $raw_article; + + my $title = ""; + my $author = ""; + my $desc = ""; + my $date = ""; + my $format = "raw"; + + foreach my $line (<ARTICLE>) + { + chomp($line); + $line =~ s/\s+$//; # remove trailing whitespaces + if ($line =~ /TITLE:\s*(.*)/) { + $title = $1; + next; + } + if ($line =~ /AUTHOR:\s*(.*)/) { + $author = $1; + next; + } + if ($line =~ /DATE:\s*(.*)/) { + $date = $1; + $date =~ tr/@/ /; + $date = &ParseDateString($date); + $date = &UnixDate($date, "%Y-%m-%d %T"); +# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z"); + next; + } + if ($line =~ /DESC:\s*(.*)/) { + $desc = $1; + next; + } + if ($line =~ /FORMAT:\s*(.*)/) { + $format = $1; + next; + } + + # Article delimiters are hardcoded -- works for me... + if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/) + { + next; + } + + $raw_article .= $line . "\n"; + } + + close(ARTICLE); + + # Full article is created by prepending the title and appendig + # the stored tags + +if (0) { +open(OUT, ">$file"); +print OUT "TITLE: $title\n"; +print OUT "AUTHOR: $author\n"; +print OUT "DATE: $date\n"; +#print OUT "DESC: $desc\n"; +print OUT "DESC: \n"; +print OUT "FORMAT: markdown\n"; +#print OUT "FORMAT: $format\n"; +print OUT "-----\n"; +print OUT "BODY:\n\n"; + +if ($format eq 'markdown') { + print OUT $raw_article; +} else { + print OUT $wc->html2wiki($raw_article); +} + +print OUT "\n\nEND-----\n"; +close(OUT); + +} else { + my $formatted_article = "[[!meta title=\"" . $title . "\"]]\n" . + "[[!meta date=\"" . $date . "\"]]\n\n"; + + if ($format eq 'markdown') { + $formatted_article .= $raw_article; + } else { # 'raw' etc + $formatted_article .= $wc->html2wiki($raw_article) . "\n\n"; + } + + # Only add tags when available + if(exists($tags{$file})) + { + $formatted_article .= "[[!tag " . $tags{$file} . "]]\n"; + } + + # Write formatted article to file; the filename is a sanitized + # version of the article title. + # + # Note that subdirectories for each year are created. + + my $year = &UnixDate($date, "%Y"); + mkdir($year); + + open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file"; + print OUT $formatted_article; + close(OUT); + } + } +} + +closedir(IN); + +# Sanitizes a filename, following the example of Wordpress: +# * Convert to lowercase +# * Remove non-alphanumeric characters +# * Replace spaces and dashes with underscores +# * Replace adjacent underscores with a single underscore +# * Remove a trailing underscore + +sub sanitize +{ + my ($file) = @_; + my $sanitized = lc($file); + + $sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g; + $sanitized =~ s/[\s\-]/_/g; + $sanitized =~ s/__+/_/g; + $sanitized =~ s/_$//; + + return($sanitized); +} |