159 lines
3.7 KiB
Perl
Executable File
159 lines
3.7 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# nb2nikola --- a conversion script from NanoBlogger to nikola
|
|
#
|
|
# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Solomon Peachy wrote
|
|
# this script. As long you retain this notice, you can do whatever you want
|
|
# with it. If we meet some day, and you feel like it, you can buy me a hot
|
|
# beverage of my choice in return.
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use HTML::WikiConverter::Markdown;
|
|
use Date::Manip;
|
|
|
|
my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
|
|
opendir(IN, $input_directory) or die "Unable to open input directory";
|
|
|
|
my @files = readdir(IN);
|
|
|
|
# Identify database files and store tags for the filenames; the tags are forced
|
|
# to become lowercase
|
|
|
|
my %tags = ();
|
|
foreach my $file (@files)
|
|
{
|
|
if($file =~ m/\.db$/i && !($file eq "master.db"))
|
|
{
|
|
open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
|
|
|
|
my $category = lc(<DB>); # Category is always the first line of the file
|
|
chomp($category);
|
|
$category =~ tr/. /_/;
|
|
|
|
foreach my $article (<DB>)
|
|
{
|
|
# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
|
|
$article =~ m/(.*\.txt).*/;
|
|
|
|
if(exists($tags{$1})) {
|
|
$tags{$1} .= $category . " ";
|
|
} else {
|
|
$tags{$1} = $category . " ";
|
|
}
|
|
}
|
|
|
|
close(DB);
|
|
}
|
|
}
|
|
|
|
# Process articles
|
|
|
|
my $wc = new HTML::WikiConverter( dialect => 'Markdown',
|
|
link_style => 'inline',
|
|
image_style => 'inline',
|
|
header_style => 'atx');
|
|
|
|
foreach my $file (@files)
|
|
{
|
|
if($file =~ m/\.txt$/i)
|
|
{
|
|
open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
|
|
|
|
# This will store the lines that belong to the actual content
|
|
# of the article
|
|
my $raw_article;
|
|
|
|
my $title = "";
|
|
my $author = "";
|
|
my $desc = "";
|
|
my $date = "";
|
|
my $format = "raw";
|
|
my $slug;
|
|
foreach my $line (<ARTICLE>) {
|
|
chomp($line);
|
|
$line =~ s/\s+$//; # remove trailing whitespaces
|
|
if ($line =~ /TITLE:\s*(.*)/) {
|
|
$title = $1;
|
|
$slug = sanitize(lc($1));
|
|
next;
|
|
}
|
|
if ($line =~ /AUTHOR:\s*(.*)/) {
|
|
$author = $1;
|
|
next;
|
|
}
|
|
if ($line =~ /DATE:\s*(.*)/) {
|
|
$date = $1;
|
|
$date =~ tr/@/ /;
|
|
$date = &ParseDateString($date);
|
|
$date = &UnixDate($date, "%Y/%m/%d %T");
|
|
# $date = &UnixDate($date, "%Y-%m-%d %T");
|
|
# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
|
|
next;
|
|
}
|
|
if ($line =~ /DESC:\s*(.*)/) {
|
|
$desc = $1;
|
|
next;
|
|
}
|
|
if ($line =~ /FORMAT:\s*(.*)/) {
|
|
$format = $1;
|
|
next;
|
|
}
|
|
|
|
# Article delimiters are hardcoded -- works for me...
|
|
if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
|
|
{
|
|
next;
|
|
}
|
|
|
|
$raw_article .= $line . "\n";
|
|
}
|
|
|
|
close(ARTICLE);
|
|
|
|
# Full article is created by prepending the title and appendig
|
|
# the stored tags
|
|
|
|
open(OUT, ">00-" . sanitize($title) . ".md");
|
|
print OUT "<!--\n";
|
|
print OUT ".. link: \n";
|
|
print OUT ".. description: $desc\n";
|
|
print OUT ".. tags: $tags{$file}\n";
|
|
print OUT ".. date: $date\n";
|
|
print OUT ".. title: $title\n";
|
|
print OUT ".. slug: $slug\n";
|
|
print OUT ".. type: text\n";
|
|
print OUT "-->\n\n";
|
|
|
|
if ($format eq 'markdown') {
|
|
print OUT $raw_article;
|
|
} else {
|
|
print OUT $wc->html2wiki($raw_article);
|
|
}
|
|
close(OUT);
|
|
}
|
|
}
|
|
|
|
closedir(IN);
|
|
|
|
# Sanitizes a filename, following the example of Wordpress:
|
|
# * Convert to lowercase
|
|
# * Remove non-alphanumeric characters
|
|
# * Replace spaces and dashes with underscores
|
|
# * Replace adjacent underscores with a single underscore
|
|
# * Remove a trailing underscore
|
|
|
|
sub sanitize
|
|
{
|
|
my ($file) = @_;
|
|
my $sanitized = lc($file);
|
|
|
|
$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
|
|
$sanitized =~ s/[\s\-]/_/g;
|
|
$sanitized =~ s/__+/_/g;
|
|
$sanitized =~ s/_$//;
|
|
|
|
return($sanitized);
|
|
}
|