summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2013-08-23 22:19:55 -0400
committerSolomon Peachy <pizza@shaftnet.org>2013-08-23 22:19:55 -0400
commit001c5dac095d31fba4ba545ed3902e2ab026792a (patch)
tree51f3682f647dc04797106369a57f78ea0a27d188
parent14758322e8fbffe964a1003da03dad39e4183edd (diff)
downloadslp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.tar.gz
slp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.tar.bz2
slp_misc-001c5dac095d31fba4ba545ed3902e2ab026792a.zip
work-in-progress changes to the nb2ikiwiki script.
-rw-r--r--nb2ikiwiki.pl192
1 files changed, 192 insertions, 0 deletions
diff --git a/nb2ikiwiki.pl b/nb2ikiwiki.pl
new file mode 100644
index 0000000..80766c0
--- /dev/null
+++ b/nb2ikiwiki.pl
@@ -0,0 +1,192 @@
+#!/usr/bin/perl
+#
+# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki
+#
+# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote
+# this script. As long you retain this notice, you can do whatever you want
+# with it. If we meet some day, and you feel like it, you can buy me a hot
+# beverage of my choice in return.
+
+use strict;
+use warnings;
+
+use HTML::WikiConverter::Markdown;
+use Date::Manip;
+
+my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
+opendir(IN, $input_directory) or die "Unable to open input directory";
+
+my @files = readdir(IN);
+
+# Identify database files and store tags for the filenames; the tags are forced
+# to become lowercase
+
+my %tags = ();
+foreach my $file (@files)
+{
+ if($file =~ m/\.db$/i && !($file eq "master.db"))
+ {
+ open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
+
+ my $category = lc(<DB>); # Category is always the first line of the file
+ chomp($category);
+ $category =~ tr/ /_/;
+
+ foreach my $article (<DB>)
+ {
+ # Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
+ $article =~ m/(.*\.txt).*/;
+
+ if(exists($tags{$1}))
+ {
+ $tags{$1} .= $category . " ";
+ }
+ else
+ {
+ $tags{$1} = $category . " ";
+ }
+ }
+
+ close(DB);
+ }
+}
+
+# Process articles
+
+my $wc = new HTML::WikiConverter( dialect => 'Markdown',
+ link_style => 'inline',
+ image_style => 'inline',
+ header_style => 'atx');
+
+foreach my $file (@files)
+{
+ if($file =~ m/\.txt$/i)
+ {
+ open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
+
+ # This will store the lines that belong to the actual content
+ # of the article
+ my $raw_article;
+
+ my $title = "";
+ my $author = "";
+ my $desc = "";
+ my $date = "";
+ my $format = "raw";
+
+ foreach my $line (<ARTICLE>)
+ {
+ chomp($line);
+ $line =~ s/\s+$//; # remove trailing whitespaces
+ if ($line =~ /TITLE:\s*(.*)/) {
+ $title = $1;
+ next;
+ }
+ if ($line =~ /AUTHOR:\s*(.*)/) {
+ $author = $1;
+ next;
+ }
+ if ($line =~ /DATE:\s*(.*)/) {
+ $date = $1;
+ $date =~ tr/@/ /;
+ $date = &ParseDateString($date);
+ $date = &UnixDate($date, "%Y-%m-%d %T");
+# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
+ next;
+ }
+ if ($line =~ /DESC:\s*(.*)/) {
+ $desc = $1;
+ next;
+ }
+ if ($line =~ /FORMAT:\s*(.*)/) {
+ $format = $1;
+ next;
+ }
+
+ # Article delimiters are hardcoded -- works for me...
+ if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
+ {
+ next;
+ }
+
+ $raw_article .= $line . "\n";
+ }
+
+ close(ARTICLE);
+
+ # Full article is created by prepending the title and appendig
+ # the stored tags
+
+if (0) {
+open(OUT, ">$file");
+print OUT "TITLE: $title\n";
+print OUT "AUTHOR: $author\n";
+print OUT "DATE: $date\n";
+#print OUT "DESC: $desc\n";
+print OUT "DESC: \n";
+print OUT "FORMAT: markdown\n";
+#print OUT "FORMAT: $format\n";
+print OUT "-----\n";
+print OUT "BODY:\n\n";
+
+if ($format eq 'markdown') {
+ print OUT $raw_article;
+} else {
+ print OUT $wc->html2wiki($raw_article);
+}
+
+print OUT "\n\nEND-----\n";
+close(OUT);
+
+} else {
+ my $formatted_article = "[[!meta title=\"" . $title . "\"]]\n" .
+ "[[!meta date=\"" . $date . "\"]]\n\n";
+
+ if ($format eq 'markdown') {
+ $formatted_article .= $raw_article;
+ } else { # 'raw' etc
+ $formatted_article .= $wc->html2wiki($raw_article) . "\n\n";
+ }
+
+ # Only add tags when available
+ if(exists($tags{$file}))
+ {
+ $formatted_article .= "[[!tag " . $tags{$file} . "]]\n";
+ }
+
+ # Write formatted article to file; the filename is a sanitized
+ # version of the article title.
+ #
+ # Note that subdirectories for each year are created.
+
+ my $year = &UnixDate($date, "%Y");
+ mkdir($year);
+
+ open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file";
+ print OUT $formatted_article;
+ close(OUT);
+ }
+ }
+}
+
+closedir(IN);
+
+# Sanitizes a filename, following the example of Wordpress:
+# * Convert to lowercase
+# * Remove non-alphanumeric characters
+# * Replace spaces and dashes with underscores
+# * Replace adjacent underscores with a single underscore
+# * Remove a trailing underscore
+
+sub sanitize
+{
+ my ($file) = @_;
+ my $sanitized = lc($file);
+
+ $sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
+ $sanitized =~ s/[\s\-]/_/g;
+ $sanitized =~ s/__+/_/g;
+ $sanitized =~ s/_$//;
+
+ return($sanitized);
+}