Miscellaneous code that doesn't have a better home.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

158 lines
3.7 KiB

#!/usr/bin/perl
#
# nb2nikola --- a conversion script from NanoBlogger to nikola
#
# Released under the HOT-BEVERAGE-OF-MY-CHOICE LICENSE: Solomon Peachy wrote
# this script. As long you retain this notice, you can do whatever you want
# with it. If we meet some day, and you feel like it, you can buy me a hot
# beverage of my choice in return.
use strict;
use warnings;
use HTML::WikiConverter::Markdown;
use Date::Manip;
my $input_directory = "/home/pizza/nanoblogger-3.5-rc1/pizza/data";
opendir(IN, $input_directory) or die "Unable to open input directory";
my @files = readdir(IN);
# Identify database files and store tags for the filenames; the tags are forced
# to become lowercase
my %tags = ();
foreach my $file (@files)
{
if($file =~ m/\.db$/i && !($file eq "master.db"))
{
open(DB, $input_directory . "/" . $file) or die "Unable to open database file";
my $category = lc(<DB>); # Category is always the first line of the file
chomp($category);
$category =~ tr/. /_/;
foreach my $article (<DB>)
{
# Ignore assignments of multiple tags, i.e. foo.txt>1,3. I only require the filename.
$article =~ m/(.*\.txt).*/;
if(exists($tags{$1})) {
$tags{$1} .= ", $category";
} else {
$tags{$1} = $category;
}
}
close(DB);
}
}
# Process articles
my $wc = new HTML::WikiConverter( dialect => 'Markdown',
link_style => 'inline',
image_style => 'inline',
header_style => 'atx');
foreach my $file (@files)
{
if($file =~ m/\.txt$/i)
{
open(ARTICLE, $input_directory . "/" . $file) or die "Unable to open article file";
# This will store the lines that belong to the actual content
# of the article
my $raw_article;
my $title = "";
my $author = "";
my $desc = "";
my $date = "";
my $format = "raw";
my $slug;
foreach my $line (<ARTICLE>) {
chomp($line);
$line =~ s/\s+$//; # remove trailing whitespaces
if ($line =~ /TITLE:\s*(.*)/) {
$title = $1;
$slug = sanitize(lc($1));
next;
}
if ($line =~ /AUTHOR:\s*(.*)/) {
$author = $1;
next;
}
if ($line =~ /DATE:\s*(.*)/) {
$date = $1;
$date =~ tr/@/ /;
$date = &ParseDateString($date);
$date = &UnixDate($date, "%Y/%m/%d %T");
# $date = &UnixDate($date, "%Y-%m-%d %T");
# $date = &UnixDate($date, "%B %d, %Y @ %H:%M %Z");
next;
}
if ($line =~ /DESC:\s*(.*)/) {
$desc = $1;
next;
}
if ($line =~ /FORMAT:\s*(.*)/) {
$format = $1;
next;
}
# Article delimiters are hardcoded -- works for me...
if($line =~ m/BODY\:$/ or $line =~ m/END(-){5}$/ or $line =~ m/(-){5}$/)
{
next;
}
$raw_article .= $line . "\n";
}
close(ARTICLE);
# Full article is created by prepending the title and appendig
# the stored tags
open(OUT, ">00-" . sanitize($title) . ".md");
print OUT "<!--\n";
print OUT ".. link: \n";
print OUT ".. description: $desc\n";
print OUT ".. tags: $tags{$file}\n";
print OUT ".. date: $date\n";
print OUT ".. title: $title\n";
print OUT ".. slug: $slug\n";
print OUT ".. type: text\n";
print OUT "-->\n\n";
if ($format eq 'markdown') {
print OUT $raw_article;
} else {
print OUT $wc->html2wiki($raw_article);
}
close(OUT);
}
}
closedir(IN);
# Sanitizes a filename, following the example of Wordpress:
# * Convert to lowercase
# * Remove non-alphanumeric characters
# * Replace spaces and dashes with underscores
# * Replace adjacent underscores with a single underscore
# * Remove a trailing underscore
sub sanitize
{
my ($file) = @_;
my $sanitized = lc($file);
$sanitized =~ s/[^0-9a-z_\- \t\n\r\f]//g;
$sanitized =~ s/[\s\-]/_/g;
$sanitized =~ s/__+/_/g;
$sanitized =~ s/_$//;
return($sanitized);
}