slp_misc/nb2ikiwiki.pl

#!/usr/bin/perl
#
# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki
#
# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote
# this script. As long you retain this notice, you can do whatever you want
# with it. If we meet some day, and you feel like it, you can buy me a hot
# beverage of my choice in return.

use strict;
use warnings;

use HTML::WikiConverter::Markdown;
use Date::Manip;

my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
opendir(IN, $input_directory) or die "Unable to open input directory";

my @files = readdir(IN);

# Identify database files and store tags for the filenames; the tags are forced
# to become lowercase

my %tags = ();
foreach my $file (@files)
{
	if($file =~ m/\.db$/i && !($file eq "master.db"))
	{
		open(DB, $input_directory . "/" . $file) or die "Unable to open database file";

		my $category = lc(<DB>); # Category is always the first line of the file
		chomp($category);
		$category =~ tr/ /_/;

		foreach my $article (<DB>)
		{
			# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
			$article =~ m/(.*\.txt).*/;

			if(exists($tags{$1}))
			{
				$tags{$1} .= $category . " ";
			}
			else
			{
				$tags{$1} = $category . " ";
			}
		}

		close(DB);
	}
}

# Process articles

my $wc = new HTML::WikiConverter(	dialect		=> 'Markdown',
					link_style	=> 'inline',
					image_style	=> 'inline',
					header_style	=> 'atx');

foreach my $file (@files)
{
	if($file =~ m/\.txt$/i)
	{
		open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";

		# This will store the lines that belong to the actual content
		# of the article
		my $raw_article;

		my $title = "";
		my $author = "";
		my $desc = "";
		my $date = "";
		my $format = "raw";

		foreach my $line (<ARTICLE>)
		{
		    chomp($line);
		    $line =~ s/\s+$//; # remove trailing whitespaces
		    if ($line =~ /TITLE:\s*(.*)/) {
			$title = $1;
			next;
		    }
		    if ($line =~ /AUTHOR:\s*(.*)/) {
			$author = $1;
			next;
		    }
		    if ($line =~ /DATE:\s*(.*)/) {
			$date = $1;
			$date =~ tr/@/ /;
			$date = &ParseDateString($date);
			$date = &UnixDate($date, "%Y-%m-%d %T");
#			$date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
			next;
		    }
		    if ($line =~ /DESC:\s*(.*)/) {
			$desc = $1;
			next;
		    }
		    if ($line =~ /FORMAT:\s*(.*)/) {
			$format = $1;
			next;
		    }

		    # Article delimiters are hardcoded -- works for me...
		    if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
		    {
			next;
		    }

		    $raw_article .= $line . "\n";
		}

		close(ARTICLE);

		# Full article is created by prepending the title and appendig
		# the stored tags

if (0) {
open(OUT, ">$file");
print OUT "TITLE: $title\n";
print OUT "AUTHOR: $author\n";
print OUT "DATE: $date\n";
#print OUT "DESC: $desc\n";
print OUT "DESC: \n";
print OUT "FORMAT: markdown\n";
#print OUT "FORMAT: $format\n";
print OUT "-----\n";
print OUT "BODY:\n\n";

if ($format eq 'markdown') {
    print OUT $raw_article;
} else {
    print OUT $wc->html2wiki($raw_article);
}

print OUT "\n\nEND-----\n";
close(OUT);

} else {
		my $formatted_article = "[[!meta title=\""	. $title	. "\"]]\n" .
		    "[[!meta date=\""	. $date		. "\"]]\n\n";

		if ($format eq 'markdown') {
		    $formatted_article .= $raw_article;
		} else { # 'raw' etc
		    $formatted_article .= $wc->html2wiki($raw_article)		. "\n\n";
		}

		# Only add tags when available
		if(exists($tags{$file}))
		{
			$formatted_article .= "[[!tag " . $tags{$file}	. "]]\n";
		}

		# Write formatted article to file; the filename is a sanitized
		# version of the article title.
		#
		# Note that subdirectories for each year are created.

		my $year = &UnixDate($date, "%Y");
		my $month = &UnixDate($date, "%m");
		mkdir("$year");
		open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file";
		print OUT $formatted_article;
		close(OUT);
 }
	}
}

closedir(IN);

# Sanitizes a filename, following the example of Wordpress:
# 	* Convert to lowercase
#	* Remove non-alphanumeric characters
#	* Replace spaces and dashes with underscores
#	* Replace adjacent underscores with a single underscore
#	* Remove a trailing underscore

sub sanitize
{
	my ($file) = @_;
	my $sanitized = lc($file);

	$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
	$sanitized =~ s/[\s\-]/_/g;
	$sanitized =~ s/__+/_/g;
	$sanitized =~ s/_$//;

	return($sanitized);
}