work-in-progress changes to the nb2ikiwiki script.

2013-08-23 22:19:55 -04:00 · 2013-08-23 22:19:55 -04:00 · 001c5dac09
parent 14758322e8
commit 001c5dac09
1 changed files with 192 additions and 0 deletions
--- a/nb2ikiwiki.pl
+++ b/nb2ikiwiki.pl
@ -0,0 +1,192 @@
+#!/usr/bin/perl
+#
+# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki
+#
+# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote
+# this script. As long you retain this notice, you can do whatever you want
+# with it. If we meet some day, and you feel like it, you can buy me a hot
+# beverage of my choice in return.
+
+use strict;
+use warnings;
+
+use HTML::WikiConverter::Markdown;
+use Date::Manip;
+
+my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
+opendir(IN, $input_directory) or die "Unable to open input directory";
+
+my @files = readdir(IN);
+
+# Identify database files and store tags for the filenames; the tags are forced
+# to become lowercase
+
+my %tags = ();
+foreach my $file (@files)
+{
+	if($file =~ m/\.db$/i && !($file eq "master.db"))
+	{
+		open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
+
+		my $category = lc(<DB>); # Category is always the first line of the file
+		chomp($category);
+		$category =~ tr/ /_/;
+
+		foreach my $article (<DB>)
+		{
+			# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
+			$article =~ m/(.*\.txt).*/;
+
+			if(exists($tags{$1}))
+			{
+				$tags{$1} .= $category . " ";
+			}
+			else
+			{
+				$tags{$1} = $category . " ";
+			}
+		}
+
+		close(DB);
+	}
+}
+
+# Process articles
+
+my $wc = new HTML::WikiConverter(	dialect		=> 'Markdown',
+					link_style	=> 'inline',
+					image_style	=> 'inline',
+					header_style	=> 'atx');
+
+foreach my $file (@files)
+{
+	if($file =~ m/\.txt$/i)
+	{
+		open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
+
+		# This will store the lines that belong to the actual content
+		# of the article
+		my $raw_article;
+
+		my $title = "";
+		my $author = "";
+		my $desc = "";
+		my $date = "";
+		my $format = "raw";
+
+		foreach my $line (<ARTICLE>)
+		{
+		    chomp($line);
+		    $line =~ s/\s+$//; # remove trailing whitespaces
+		    if ($line =~ /TITLE:\s*(.*)/) {
+			$title = $1;
+			next;
+		    }
+		    if ($line =~ /AUTHOR:\s*(.*)/) {
+			$author = $1;
+			next;
+		    }
+		    if ($line =~ /DATE:\s*(.*)/) {
+			$date = $1;
+			$date =~ tr/@/ /;
+			$date = &ParseDateString($date);
+			$date = &UnixDate($date, "%Y-%m-%d %T");
+#			$date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
+			next;
+		    }
+		    if ($line =~ /DESC:\s*(.*)/) {
+			$desc = $1;
+			next;
+		    }
+		    if ($line =~ /FORMAT:\s*(.*)/) {
+			$format = $1;
+			next;
+		    }
+
+		    # Article delimiters are hardcoded -- works for me...
+		    if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
+		    {
+			next;
+		    }
+		    
+		    $raw_article .= $line . "\n";
+		}
+
+		close(ARTICLE);
+
+		# Full article is created by prepending the title and appendig
+		# the stored tags
+
+if (0) {
+open(OUT, ">$file");
+print OUT "TITLE: $title\n";
+print OUT "AUTHOR: $author\n";
+print OUT "DATE: $date\n";
+#print OUT "DESC: $desc\n";
+print OUT "DESC: \n";
+print OUT "FORMAT: markdown\n";
+#print OUT "FORMAT: $format\n";
+print OUT "-----\n";
+print OUT "BODY:\n\n";
+
+if ($format eq 'markdown') {
+    print OUT $raw_article;
+} else {
+    print OUT $wc->html2wiki($raw_article);
+}
+
+print OUT "\n\nEND-----\n";
+close(OUT);    
+
+} else {
+		my $formatted_article = "[[!meta title=\""	. $title	. "\"]]\n" .
+		    "[[!meta date=\""	. $date		. "\"]]\n\n";
+
+		if ($format eq 'markdown') {
+		    $formatted_article .= $raw_article;
+		} else { # 'raw' etc
+		    $formatted_article .= $wc->html2wiki($raw_article)		. "\n\n";
+		}
+
+		# Only add tags when available
+		if(exists($tags{$file}))
+		{
+			$formatted_article .= "[[!tag " . $tags{$file}	. "]]\n";
+		}
+
+		# Write formatted article to file; the filename is a sanitized
+		# version of the article title.
+		#
+		# Note that subdirectories for each year are created.
+
+		my $year = &UnixDate($date, "%Y");
+		mkdir($year);
+
+		open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file";
+		print OUT $formatted_article;
+		close(OUT);
+ }
+	}
+}
+
+closedir(IN);
+
+# Sanitizes a filename, following the example of Wordpress: 
+# 	* Convert to lowercase
+#	* Remove non-alphanumeric characters
+#	* Replace spaces and dashes with underscores
+#	* Replace adjacent underscores with a single underscore
+#	* Remove a trailing underscore 
+
+sub sanitize
+{
+	my ($file) = @_;
+	my $sanitized = lc($file);
+
+	$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
+	$sanitized =~ s/[\s\-]/_/g;
+	$sanitized =~ s/__+/_/g;
+	$sanitized =~ s/_$//;
+
+	return($sanitized);
+}