work-in-progress changes to the nb2ikiwiki script.
This commit is contained in:
parent
14758322e8
commit
001c5dac09
192
nb2ikiwiki.pl
Normal file
192
nb2ikiwiki.pl
Normal file
|
@ -0,0 +1,192 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
# nb2ikiwiki --- a conversion script from NanoBlogger to ikiwiki
|
||||
#
|
||||
# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Bastian Rieck wrote
|
||||
# this script. As long you retain this notice, you can do whatever you want
|
||||
# with it. If we meet some day, and you feel like it, you can buy me a hot
|
||||
# beverage of my choice in return.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTML::WikiConverter::Markdown;
|
||||
use Date::Manip;
|
||||
|
||||
my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
|
||||
opendir(IN, $input_directory) or die "Unable to open input directory";
|
||||
|
||||
my @files = readdir(IN);
|
||||
|
||||
# Identify database files and store tags for the filenames; the tags are forced
|
||||
# to become lowercase
|
||||
|
||||
my %tags = ();
|
||||
foreach my $file (@files)
|
||||
{
|
||||
if($file =~ m/\.db$/i && !($file eq "master.db"))
|
||||
{
|
||||
open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
|
||||
|
||||
my $category = lc(<DB>); # Category is always the first line of the file
|
||||
chomp($category);
|
||||
$category =~ tr/ /_/;
|
||||
|
||||
foreach my $article (<DB>)
|
||||
{
|
||||
# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
|
||||
$article =~ m/(.*\.txt).*/;
|
||||
|
||||
if(exists($tags{$1}))
|
||||
{
|
||||
$tags{$1} .= $category . " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
$tags{$1} = $category . " ";
|
||||
}
|
||||
}
|
||||
|
||||
close(DB);
|
||||
}
|
||||
}
|
||||
|
||||
# Process articles
|
||||
|
||||
my $wc = new HTML::WikiConverter( dialect => 'Markdown',
|
||||
link_style => 'inline',
|
||||
image_style => 'inline',
|
||||
header_style => 'atx');
|
||||
|
||||
foreach my $file (@files)
|
||||
{
|
||||
if($file =~ m/\.txt$/i)
|
||||
{
|
||||
open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
|
||||
|
||||
# This will store the lines that belong to the actual content
|
||||
# of the article
|
||||
my $raw_article;
|
||||
|
||||
my $title = "";
|
||||
my $author = "";
|
||||
my $desc = "";
|
||||
my $date = "";
|
||||
my $format = "raw";
|
||||
|
||||
foreach my $line (<ARTICLE>)
|
||||
{
|
||||
chomp($line);
|
||||
$line =~ s/\s+$//; # remove trailing whitespaces
|
||||
if ($line =~ /TITLE:\s*(.*)/) {
|
||||
$title = $1;
|
||||
next;
|
||||
}
|
||||
if ($line =~ /AUTHOR:\s*(.*)/) {
|
||||
$author = $1;
|
||||
next;
|
||||
}
|
||||
if ($line =~ /DATE:\s*(.*)/) {
|
||||
$date = $1;
|
||||
$date =~ tr/@/ /;
|
||||
$date = &ParseDateString($date);
|
||||
$date = &UnixDate($date, "%Y-%m-%d %T");
|
||||
# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
|
||||
next;
|
||||
}
|
||||
if ($line =~ /DESC:\s*(.*)/) {
|
||||
$desc = $1;
|
||||
next;
|
||||
}
|
||||
if ($line =~ /FORMAT:\s*(.*)/) {
|
||||
$format = $1;
|
||||
next;
|
||||
}
|
||||
|
||||
# Article delimiters are hardcoded -- works for me...
|
||||
if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
|
||||
{
|
||||
next;
|
||||
}
|
||||
|
||||
$raw_article .= $line . "\n";
|
||||
}
|
||||
|
||||
close(ARTICLE);
|
||||
|
||||
# Full article is created by prepending the title and appendig
|
||||
# the stored tags
|
||||
|
||||
if (0) {
|
||||
open(OUT, ">$file");
|
||||
print OUT "TITLE: $title\n";
|
||||
print OUT "AUTHOR: $author\n";
|
||||
print OUT "DATE: $date\n";
|
||||
#print OUT "DESC: $desc\n";
|
||||
print OUT "DESC: \n";
|
||||
print OUT "FORMAT: markdown\n";
|
||||
#print OUT "FORMAT: $format\n";
|
||||
print OUT "-----\n";
|
||||
print OUT "BODY:\n\n";
|
||||
|
||||
if ($format eq 'markdown') {
|
||||
print OUT $raw_article;
|
||||
} else {
|
||||
print OUT $wc->html2wiki($raw_article);
|
||||
}
|
||||
|
||||
print OUT "\n\nEND-----\n";
|
||||
close(OUT);
|
||||
|
||||
} else {
|
||||
my $formatted_article = "[[!meta title=\"" . $title . "\"]]\n" .
|
||||
"[[!meta date=\"" . $date . "\"]]\n\n";
|
||||
|
||||
if ($format eq 'markdown') {
|
||||
$formatted_article .= $raw_article;
|
||||
} else { # 'raw' etc
|
||||
$formatted_article .= $wc->html2wiki($raw_article) . "\n\n";
|
||||
}
|
||||
|
||||
# Only add tags when available
|
||||
if(exists($tags{$file}))
|
||||
{
|
||||
$formatted_article .= "[[!tag " . $tags{$file} . "]]\n";
|
||||
}
|
||||
|
||||
# Write formatted article to file; the filename is a sanitized
|
||||
# version of the article title.
|
||||
#
|
||||
# Note that subdirectories for each year are created.
|
||||
|
||||
my $year = &UnixDate($date, "%Y");
|
||||
mkdir($year);
|
||||
|
||||
open(OUT, ">" . $year . "/" . sanitize($title) . ".mdwn") or die "Unable to open article output file";
|
||||
print OUT $formatted_article;
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
closedir(IN);
|
||||
|
||||
# Sanitizes a filename, following the example of Wordpress:
|
||||
# * Convert to lowercase
|
||||
# * Remove non-alphanumeric characters
|
||||
# * Replace spaces and dashes with underscores
|
||||
# * Replace adjacent underscores with a single underscore
|
||||
# * Remove a trailing underscore
|
||||
|
||||
sub sanitize
|
||||
{
|
||||
my ($file) = @_;
|
||||
my $sanitized = lc($file);
|
||||
|
||||
$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
|
||||
$sanitized =~ s/[\s\-]/_/g;
|
||||
$sanitized =~ s/__+/_/g;
|
||||
$sanitized =~ s/_$//;
|
||||
|
||||
return($sanitized);
|
||||
}
|
Loading…
Reference in a new issue