#!/usr/bin/perl 
#
# Developed by travel-log.net
# February 11, 2021
#
# You can use this script to analyze your goo blog backup file,
# and generate a shell script to download your image files
# as well as convert your backup file so that the html description
# points to your new image urls in your WordPress domain.
#
# Before running, you need to edit the following line around L33
#    $newdomain= "https:\/\/travel-log.net";
# so that it would point to your new WordPress domain.
# Here, you will need to change "travel-log.net" to your domain name.
#
# Run by typing
#  > perl convertBlog.pl
# after placing gooBlog.txt, backup file of goo blog, in your working directory.
#
# This converts gooBlog.txt to gooBlog_converted.txt and produces download.sh
# Urls of image files in gooBlog_converted.txt will be replaced with your
# new WordPress domain.
#
# All relevant image files will be downloaded from goo blog by typing
#  > sh download.sh
# Images will be stored in directories user_images and thumbnail.
# You need to create a directory under your domain:
#    http(s):[YOUR_DOMAIN]//wp-content/uploads/gooblog/
# and place these directories there.

use strict;

# Edit this line before running.
my $newdomain= "https:\/\/travel-log.net";

open(INTXT, "gooBlog.txt");
open(OUTTXT, ">gooBlog_converted.txt");
open(DOWNLOADSH, ">download.sh");

# list of directories to mkdir by download.sh
our $ndirnames=0;
our @dirnames= {"","","","","","","","","",""};
    
my $ntitle= 0;
while (my $line=<INTXT>) {
    chomp($line);

    # print article title
    if ($line=~"TITLE:") {
	printf("$ntitle  $line\n");
	$ntitle++;
    }
    
    my $str=$line;
    while ($str=~ /(http[s]{0,1}:\/\/blogimg.goo.ne.jp\/\w{1,20}\/\w{2}\/\w{2}\/\w*.jpg)/g) {
	printf("    $&\n");
	writeDownloadSh(*DOWNLOADSH, $&);
	$line=editLine($line, $&);
    }
    while ($str=~ /(http[s]{0,1}:\/\/blogimg.goo.ne.jp\/\w{1,20}\/\w{2}\/\w{2}\/\w*.gif)/g) {
	printf("    $&\n");
	writeDownloadSh(*DOWNLOADSH, $&);
	$line=editLine($line, $&);
    }
    while ($str=~ /(http[s]{0,1}:\/\/blogimg.goo.ne.jp\/\w{1,20}\/\w{2}\/\w{2}\/\w*.png)/g) {
	printf("    $&\n");
	writeDownloadSh(*DOWNLOADSH, $&);
	$line=editLine($line, $&);
    }
    
    printf(OUTTXT "$line\n");
}
close(INTXT);
close(OUTTXT);
close(DOWNLOADSH);



#
# example of target name
# https://travel-log.net/wp-content/uploads/gooblog/user_image/cup-of-espresso.JPG
#

# returns new URL for the image file 
sub getTargetName {
    my ($orgname)= @_;
    my $verbose= 0;
    if ($verbose) {printf("getTargetName>   orgname: ${orgname}\n");}
    my @subdirnames=split(/\//, ${orgname});
    if ($verbose) {printf("getTargetName>   ${subdirnames[3]}  ${subdirnames[4]}  ${subdirnames[5]}  ${subdirnames[6]}\n");}    
    my $targetname="${newdomain}\/wp-content\/uploads\/gooblog\/${subdirnames[3]}\/${subdirnames[6]}";
    if ($verbose) {printf("getTargetName>   targetname: ${targetname}\n");}
    return $targetname;
}

# returns the file name, without path and directories
sub getFileName {
    my ($orgname)= @_;
    my $verbose= 0;
    if ($verbose) {printf("getFileName>   orgname: ${orgname}\n");}
    my @subdirnames=split(/\//, ${orgname});
    if ($verbose) {printf("getFileName>  ${subdirnames[3]}  ${subdirnames[4]}  ${subdirnames[5]}  ${subdirnames[6]}\n");}
    my $filename="${subdirnames[6]}";
    if ($verbose) {printf("getFileName>   filename: ${filename}\n");}
    return $filename;
}

# returns the directory name, usually either user_image or thumbnail
sub getDirName {
    my ($orgname)= @_;
    my $verbose= 0;
    if ($verbose) {printf("getDirName>   orgname: ${orgname}\n");}
    my @subdirnames=split(/\//, ${orgname});
    if ($verbose) {printf("getDirName>  ${subdirnames[3]}  ${subdirnames[4]}  ${subdirnames[5]}  ${subdirnames[6]}\n");}
    my $dirname= $subdirnames[3];
    if ($verbose) {printf("getFileName>   dirname: ${dirname}\n");}
    return $dirname;
}

# spills out shell commands out to DOWNLOADSH
sub writeDownloadSh {
    my $verbose= 0;
    my $funcname= "writeDownloadSh";
    
    local *FILE= @_[0];
    my $httpstring= @_[1];
    if ($httpstring eq "")  {return 1}
    
    # analyze the subdirectory and file names from https full path
    my $orgname1= $httpstring;
    my $targetname1= getTargetName($orgname1);
    my $filename1= getFileName($orgname1);
    my $dirname1= getDirName($orgname1);
    if ($verbose) {printf("$funcname>       org: $orgname1\n");}
    if ($verbose) {printf("$funcname>    target: $targetname1\n");}
    if ($verbose) {printf("$funcname>      file: $filename1\n");}
    if ($verbose) {printf("$funcname>       dir: $dirname1\n");}
    
    # check if dirname1 is the first occurrance 
    my $need_mkdir1= 1;
    for (my $idirnames=0; $idirnames<$ndirnames; $idirnames++) {
	if ($dirname1 eq ${dirnames[$idirnames]}) {
	    $need_mkdir1= 0;
	    next;
	}
    }
    if (($need_mkdir1>0) || ($ndirnames==0)) {
	${dirnames[$ndirnames]}= $dirname1;
	$ndirnames++;
    }
    
    if ($need_mkdir1) {
	printf(DOWNLOADSH "if [ ! -e {$dirname1} ]; then\n");	    
	printf(DOWNLOADSH "  echo making dir:  $dirname1; \n");
	printf(DOWNLOADSH "  mkdir $dirname1\n");
	printf(DOWNLOADSH "fi\n");
    }
    printf(DOWNLOADSH "echo downloading $orgname1...\n");
    printf(DOWNLOADSH "  wget $orgname1\n");
    printf(DOWNLOADSH "  mv $filename1 $dirname1\n");

    return 0;
}

# convert the line to spill out to OUTTXT
sub editLine {
    my $funcname= "editLine";
    my $verbose= 0;
    
    my $line= @_[0];
    my $orgurl= @_[1];
    if ($verbose) {printf("$funcname>  org line:  $line\n");}

    if ($orgurl eq "")  {return $line}
    my $targeturl= getTargetName($orgurl);

    $line=~s/$orgurl/$targeturl/;
    if ($verbose) {printf("$funcname>  new line:  $line\n");}
    
    return $line;
}
