slp_misc/nb2ikiwiki.pl

193 lines
4.4 KiB
Perl
Executable File

#!/usr/bin/perl
#
# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki
#
# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote
# this script. As long you retain this notice, you can do whatever you want
# with it. If we meet some day, and you feel like it, you can buy me a hot
# beverage of my choice in return.
use strict;
use warnings;
use HTML::WikiConverter::Markdown;
use Date::Manip;
my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
opendir(IN, $input_directory) or die "Unable to open input directory";
my @files = readdir(IN);
# Identify database files and store tags for the filenames; the tags are forced
# to become lowercase
my %tags = ();
foreach my $file (@files)
{
if($file =~ m/\.db$/i && !($file eq "master.db"))
{
open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
my $category = lc(<DB>); # Category is always the first line of the file
chomp($category);
$category =~ tr/ /_/;
foreach my $article (<DB>)
{
# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
$article =~ m/(.*\.txt).*/;
if(exists($tags{$1}))
{
$tags{$1} .= $category . " ";
}
else
{
$tags{$1} = $category . " ";
}
}
close(DB);
}
}
# Process articles
my $wc = new HTML::WikiConverter( dialect => 'Markdown',
link_style => 'inline',
image_style => 'inline',
header_style => 'atx');
foreach my $file (@files)
{
if($file =~ m/\.txt$/i)
{
open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
# This will store the lines that belong to the actual content
# of the article
my $raw_article;
my $title = "";
my $author = "";
my $desc = "";
my $date = "";
my $format = "raw";
foreach my $line (<ARTICLE>)
{
chomp($line);
$line =~ s/\s+$//; # remove trailing whitespaces
if ($line =~ /TITLE:\s*(.*)/) {
$title = $1;
next;
}
if ($line =~ /AUTHOR:\s*(.*)/) {
$author = $1;
next;
}
if ($line =~ /DATE:\s*(.*)/) {
$date = $1;
$date =~ tr/@/ /;
$date = &ParseDateString($date);
$date = &UnixDate($date, "%Y-%m-%d %T");
# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
next;
}
if ($line =~ /DESC:\s*(.*)/) {
$desc = $1;
next;
}
if ($line =~ /FORMAT:\s*(.*)/) {
$format = $1;
next;
}
# Article delimiters are hardcoded -- works for me...
if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
{
next;
}
$raw_article .= $line . "\n";
}
close(ARTICLE);
# Full article is created by prepending the title and appendig
# the stored tags
if (0) {
open(OUT, ">$file");
print OUT "TITLE: $title\n";
print OUT "AUTHOR: $author\n";
print OUT "DATE: $date\n";
#print OUT "DESC: $desc\n";
print OUT "DESC: \n";
print OUT "FORMAT: markdown\n";
#print OUT "FORMAT: $format\n";
print OUT "-----\n";
print OUT "BODY:\n\n";
if ($format eq 'markdown') {
print OUT $raw_article;
} else {
print OUT $wc->html2wiki($raw_article);
}
print OUT "\n\nEND-----\n";
close(OUT);
} else {
my $formatted_article = "[[!meta title=\"" . $title . "\"]]\n" .
"[[!meta date=\"" . $date . "\"]]\n\n";
if ($format eq 'markdown') {
$formatted_article .= $raw_article;
} else { # 'raw' etc
$formatted_article .= $wc->html2wiki($raw_article) . "\n\n";
}
# Only add tags when available
if(exists($tags{$file}))
{
$formatted_article .= "[[!tag " . $tags{$file} . "]]\n";
}
# Write formatted article to file; the filename is a sanitized
# version of the article title.
#
# Note that subdirectories for each year are created.
my $year = &UnixDate($date, "%Y");
my $month = &UnixDate($date, "%m");
mkdir("$year");
open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file";
print OUT $formatted_article;
close(OUT);
}
}
}
closedir(IN);
# Sanitizes a filename, following the example of Wordpress:
# * Convert to lowercase
# * Remove non-alphanumeric characters
# * Replace spaces and dashes with underscores
# * Replace adjacent underscores with a single underscore
# * Remove a trailing underscore
sub sanitize
{
my ($file) = @_;
my $sanitized = lc($file);
$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
$sanitized =~ s/[\s\-]/_/g;
$sanitized =~ s/__+/_/g;
$sanitized =~ s/_$//;
return($sanitized);
}