Initial commit.

This commit is contained in:
Fabian Schlenz 2020-12-08 19:37:51 +01:00
commit dc4bdeb43e
4 changed files with 191 additions and 0 deletions

34
README.md Normal file
View File

@ -0,0 +1,34 @@
# Convert your Jekyll blog to Wordpress
This is a small collection of scripts to help you move
your blog from Jekyll to Wordpress.
This is pretty dirty code and heavily tailored toward
my own needs, so your mileage may vary. But perhaps
you'll find it at least a bit helpful.
## Features
This will help you import:
* Posts
* Tags
* Images
## Usage
1. Read the scripts and modify them as needed. Change
the username, for example. ;-) There are quite a few
things in there you'll want to change.
2. Copy wp_export.xml to your Jekyll data folder and
run `jekyll build`. This will create an export of all
your posts in `_site/wp_export.xml`. Do not import this
file in Wordpress yet!
3. Run extract_media.rb to extract all image URLs used
in the export: `ruby extract_media.rb < _site/wp_export.xml
> wp_export.images.xml`.
4. Run finalize_export.rb to modify the image links:
`ruby finalizy_export.rb < _site/wp_export.xml > wp_export.posts.xml`.
5. Use the Wordpress import plugin to import `wp_export.posts.xml`
(do not activate the `import attachments` option).
6. Use the Wordpress import plugin to import `wp_export.images.xml`,
this time activating the option to `import attachments`.

77
extract_media.rb Normal file
View File

@ -0,0 +1,77 @@
puts <<-XML
<?xml version="1.0" encoding="UTF-8" ?>
<!-- generator="WordPress/5.5.3" created="2020-12-08 12:42" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
<title>fabianonline.de</title>
<link>https://blog.fabianonline.de</link>
<description>Eine weitere Wordpress-Test Websites Website</description>
<pubDate>Tue, 08 Dec 2020 12:42:20 +0000</pubDate>
<language>de-DE</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:base_site_url>https://blog2.fabianonline.de</wp:base_site_url>
<wp:base_blog_url>https://blog2.fabianonline.de</wp:base_blog_url>
<wp:author>
<wp:author_id>2</wp:author_id>
<wp:author_login><![CDATA[fabian]]></wp:author_login>
<wp:author_email><![CDATA[mail@fabianonline.de]]></wp:author_email>
<wp:author_display_name><![CDATA[fabian]]></wp:author_display_name>
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
</wp:author>
<generator>https://wordpress.org/?v=5.5.3</generator>
XML
# Look at all strings starting with src="/uploads/ or href="/uploads/ - you might want to change this if
# your images are saved at another place.
images = STDIN.read.scan(/(?:src|href)=&quot;(\/uploads\/.+?)&quot;/).to_a.map(&:first).uniq
# Pay close attention to the attachment_url in this block: This is the path where Wordpress will try to load
# the image from. This should be correct and accessible for wordpress.
# The date is given as Jan 1, 1970 so we know Wordpress will put the images into the path 1970/01. This will
# be relied on in the next script.
images.each_with_index do |path, i|
puts <<-XML2
<item>
<title>#{File.basename(path)}</title>
<link>https://blog.fabianonline.de#{path}</link>
<pubDate>Tue, 01 Jan 1970 12:41:29 +0000</pubDate>
<dc:creator><![CDATA[fabian]]></dc:creator>
<guid isPermaLink="false">https://blog.fabianonline.de#{path}</guid>
<description></description>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id></wp:post_id>
<wp:post_date><![CDATA[1970-01-01 13:41:29]]></wp:post_date>
<wp:post_date_gmt><![CDATA[1970-01-01 12:41:29]]></wp:post_date_gmt>
<wp:comment_status><![CDATA[open]]></wp:comment_status>
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
<wp:post_name><![CDATA[#{File.basename(path)}]]></wp:post_name>
<wp:status><![CDATA[inherit]]></wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[attachment]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<wp:attachment_url><![CDATA[https://blog.fabianonline.de#{path}]]></wp:attachment_url>
<wp:postmeta>
<wp:meta_key><![CDATA[_wp_attached_file]]></wp:meta_key>
<wp:meta_value><![CDATA[#{path}]]></wp:meta_value>
</wp:postmeta>
</item>
XML2
end
puts "
</channel>
</rss>
"

14
finalize_export.rb Normal file
View File

@ -0,0 +1,14 @@
data = STDIN.read
# Replace the image paths to be accessible in Wordpress.
# The new path given here is the path for my case: A multi-site blog. The 1970/01
# in the path was determined by setting a date during extract_media.rb.
# Upload a file to your media gallery and look at the image url to determine
# your correct path.
images = data.scan(/(?:src|href)=&quot;(\/uploads\/.+?)&quot;/).to_a.map(&:first).uniq
images.each do |path|
data = data.gsub("=&quot;#{path}&quot;", "=&quot;/wp-content/uploads/sites/5/1970/01/#{File.basename(path)}&quot;")
end
puts data.gsub(/([^>])\n/, "\\1 ")

66
wp_export.xml Normal file
View File

@ -0,0 +1,66 @@
---
---
<?xml version="1.0" encoding="utf-8"?>
<!-- generator="WordPress/5.5.3" created="2020-12-08 09:23" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>
<channel>
<title>fabianonline.de</title>
<link>https://blog.fabianonline.de</link>
<description>Einfach Fabian - ONLINE</description>
<pubDate>Tue, 08 Dec 2020 18:23:29 +0000</pubDate>
<language>de-DE</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:base_site_url>https://blog.fabianonline.de</wp:base_site_url>
<wp:base_blog_url>https://blog.fabianonline.de</wp:base_blog_url>
<wp:author>
<wp:author_id>2</wp:author_id>
<wp:author_login><![CDATA[fabian]]></wp:author_login>
<wp:author_email><![CDATA[mail@fabianonline.de]]></wp:author_email>
<wp:author_display_name><![CDATA[Fabian]]></wp:author_display_name>
<wp:author_first_name><![CDATA[Fabian]]></wp:author_first_name>
<wp:author_last_name><![CDATA[Schlenz]]></wp:author_last_name>
</wp:author>
<generator>https://wordpress.org/?v=5.5.3</generator>
<!-- ignore this... {% increment post_id %} {% increment post_id %} {% increment post_id %} {% increment post_id %} -->
{% for post in site.posts reversed %}
<item>
<title>{{ post.title | xml_escape }}</title>
<link>https://blog.fabianonline.de{{ post.url }}</link>
<pubDate>{{post.date | date_to_xmlschema }}</pubDate>
<dc:creator><![CDATA[fabian]]></dc:creator>
<guid isPermaLink="false">https://blog.fabianonline.de{{ post.url }}</guid>
<description></description>
<content:encoded>{{ post.content | xml_escape }}</content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>{% increment post_id %}</wp:post_id>
<wp:post_date>{{ post.date | date:"%Y-%m-%d %H:%M:%S" }}</wp:post_date>
<wp:post_date_gmt>{{ post.date | date:"%Y-%m-%d %H:%M:%S" }}</wp:post_date_gmt>
<wp:comment_status><![CDATA[open]]></wp:comment_status>
<wp:ping_status><![CDATA[open]]></wp:ping_status>
<wp:post_name>{{ post.title | slugify:'latin' }}</wp:post_name>
<wp:status><![CDATA[publish]]></wp:status>
<wp:post_parent>0</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type><![CDATA[post]]></wp:post_type>
<wp:post_password><![CDATA[]]></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
{% for tag in post.tags %}
<category domain="post_tag" nicename="{{ tag | slugify:'latin' }}">{{tag | xml_escape }}</category>
{% endfor %}
<wp:postmeta>
<wp:meta_key><![CDATA[_edit_last]]></wp:meta_key>
<wp:meta_value><![CDATA[2]]></wp:meta_value>
</wp:postmeta>
</item>
{% endfor %}
</channel>
</rss>