commit dc4bdeb43e8a061e1fa33684ef8c4862d194c38d Author: Fabian Schlenz Date: Tue Dec 8 19:37:51 2020 +0100 Initial commit. diff --git a/README.md b/README.md new file mode 100644 index 0000000..94bc7aa --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Convert your Jekyll blog to Wordpress + +This is a small collection of scripts to help you move +your blog from Jekyll to Wordpress. + +This is pretty dirty code and heavily tailored toward +my own needs, so your mileage may vary. But perhaps +you'll find it at least a bit helpful. + +## Features +This will help you import: +* Posts +* Tags +* Images + +## Usage + +1. Read the scripts and modify them as needed. Change +the username, for example. ;-) There are quite a few +things in there you'll want to change. +2. Copy wp_export.xml to your Jekyll data folder and +run `jekyll build`. This will create an export of all +your posts in `_site/wp_export.xml`. Do not import this +file in Wordpress yet! +3. Run extract_media.rb to extract all image URLs used +in the export: `ruby extract_media.rb < _site/wp_export.xml +> wp_export.images.xml`. +4. Run finalize_export.rb to modify the image links: +`ruby finalizy_export.rb < _site/wp_export.xml > wp_export.posts.xml`. +5. Use the Wordpress import plugin to import `wp_export.posts.xml` +(do not activate the `import attachments` option). +6. Use the Wordpress import plugin to import `wp_export.images.xml`, +this time activating the option to `import attachments`. + diff --git a/extract_media.rb b/extract_media.rb new file mode 100644 index 0000000..ee646a4 --- /dev/null +++ b/extract_media.rb @@ -0,0 +1,77 @@ +puts <<-XML + + + + + + fabianonline.de + https://blog.fabianonline.de + Eine weitere Wordpress-Test Websites Website + Tue, 08 Dec 2020 12:42:20 +0000 + de-DE + 1.2 + https://blog2.fabianonline.de + https://blog2.fabianonline.de + + + 2 + + + + + + + + https://wordpress.org/?v=5.5.3 +XML + +# Look at all strings starting with src="/uploads/ or href="/uploads/ - you might want to change this if +# your images are saved at another place. +images = STDIN.read.scan(/(?:src|href)="(\/uploads\/.+?)"/).to_a.map(&:first).uniq + +# Pay close attention to the attachment_url in this block: This is the path where Wordpress will try to load +# the image from. This should be correct and accessible for wordpress. +# The date is given as Jan 1, 1970 so we know Wordpress will put the images into the path 1970/01. This will +# be relied on in the next script. +images.each_with_index do |path, i| + puts <<-XML2 + + #{File.basename(path)} + https://blog.fabianonline.de#{path} + Tue, 01 Jan 1970 12:41:29 +0000 + + https://blog.fabianonline.de#{path} + + + + + + + + + + + 0 + 0 + + + 0 + + + + + + +XML2 +end + +puts " + + +" diff --git a/finalize_export.rb b/finalize_export.rb new file mode 100644 index 0000000..0db3864 --- /dev/null +++ b/finalize_export.rb @@ -0,0 +1,14 @@ +data = STDIN.read + +# Replace the image paths to be accessible in Wordpress. +# The new path given here is the path for my case: A multi-site blog. The 1970/01 +# in the path was determined by setting a date during extract_media.rb. +# Upload a file to your media gallery and look at the image url to determine +# your correct path. +images = data.scan(/(?:src|href)="(\/uploads\/.+?)"/).to_a.map(&:first).uniq + +images.each do |path| + data = data.gsub("="#{path}"", "="/wp-content/uploads/sites/5/1970/01/#{File.basename(path)}"") +end + +puts data.gsub(/([^>])\n/, "\\1 ") diff --git a/wp_export.xml b/wp_export.xml new file mode 100644 index 0000000..60a02ca --- /dev/null +++ b/wp_export.xml @@ -0,0 +1,66 @@ +--- +--- + + + + + fabianonline.de + https://blog.fabianonline.de + Einfach Fabian - ONLINE + Tue, 08 Dec 2020 18:23:29 +0000 + de-DE + 1.2 + https://blog.fabianonline.de + https://blog.fabianonline.de + + + 2 + + + + + + + + https://wordpress.org/?v=5.5.3 + + + {% for post in site.posts reversed %} + + {{ post.title | xml_escape }} + https://blog.fabianonline.de{{ post.url }} + {{post.date | date_to_xmlschema }} + + https://blog.fabianonline.de{{ post.url }} + + {{ post.content | xml_escape }} + + {% increment post_id %} + {{ post.date | date:"%Y-%m-%d %H:%M:%S" }} + {{ post.date | date:"%Y-%m-%d %H:%M:%S" }} + + + {{ post.title | slugify:'latin' }} + + 0 + 0 + + + 0 + {% for tag in post.tags %} + {{tag | xml_escape }} + {% endfor %} + + + + + + {% endfor %} + +