Initial commit.

2020-12-08 19:37:51 +01:00
commit dc4bdeb43e
4 changed files with 191 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,34 @@
+# Convert your Jekyll blog to Wordpress
+
+This is a small collection of scripts to help you move
+your blog from Jekyll to Wordpress.
+
+This is pretty dirty code and heavily tailored toward
+my own needs, so your mileage may vary. But perhaps
+you'll find it at least a bit helpful.
+
+## Features
+This will help you import:
+* Posts
+* Tags
+* Images
+
+## Usage
+
+1. Read the scripts and modify them as needed. Change
+the username, for example. ;-) There are quite a few
+things in there you'll want to change.
+2. Copy wp_export.xml to your Jekyll data folder and
+run `jekyll build`. This will create an export of all
+your posts in `_site/wp_export.xml`. Do not import this
+file in Wordpress yet!
+3. Run extract_media.rb to extract all image URLs used
+in the export: `ruby extract_media.rb < _site/wp_export.xml
+> wp_export.images.xml`.
+4. Run finalize_export.rb to modify the image links:
+`ruby finalizy_export.rb < _site/wp_export.xml > wp_export.posts.xml`.
+5. Use the Wordpress import plugin to import `wp_export.posts.xml`
+(do not activate the `import attachments` option).
+6. Use the Wordpress import plugin to import `wp_export.images.xml`,
+this time activating the option to `import attachments`.
+
--- a/extract_media.rb
+++ b/extract_media.rb
@ -0,0 +1,77 @@
+puts <<-XML
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- generator="WordPress/5.5.3" created="2020-12-08 12:42" -->
+<rss version="2.0"
+	xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
+	xmlns:content="http://purl.org/rss/1.0/modules/content/"
+	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
+	xmlns:dc="http://purl.org/dc/elements/1.1/"
+	xmlns:wp="http://wordpress.org/export/1.2/"
+>
+
+<channel>
+	<title>fabianonline.de</title>
+	<link>https://blog.fabianonline.de</link>
+	<description>Eine weitere Wordpress-Test Websites Website</description>
+	<pubDate>Tue, 08 Dec 2020 12:42:20 +0000</pubDate>
+	<language>de-DE</language>
+	<wp:wxr_version>1.2</wp:wxr_version>
+	<wp:base_site_url>https://blog2.fabianonline.de</wp:base_site_url>
+	<wp:base_blog_url>https://blog2.fabianonline.de</wp:base_blog_url>
+
+	<wp:author>
+		<wp:author_id>2</wp:author_id>
+		<wp:author_login><![CDATA[fabian]]></wp:author_login>
+		<wp:author_email><![CDATA[mail@fabianonline.de]]></wp:author_email>
+		<wp:author_display_name><![CDATA[fabian]]></wp:author_display_name>
+		<wp:author_first_name><![CDATA[]]></wp:author_first_name>
+		<wp:author_last_name><![CDATA[]]></wp:author_last_name>
+	</wp:author>
+				
+	<generator>https://wordpress.org/?v=5.5.3</generator>
+XML
+
+# Look at all strings starting with src="/uploads/ or href="/uploads/ - you might want to change this if
+# your images are saved at another place.
+images = STDIN.read.scan(/(?:src|href)=&quot;(\/uploads\/.+?)&quot;/).to_a.map(&:first).uniq
+
+# Pay close attention to the attachment_url in this block: This is the path where Wordpress will try to load
+# the image from. This should be correct and accessible for wordpress.
+# The date is given as Jan 1, 1970 so we know Wordpress will put the images into the path 1970/01. This will
+# be relied on in the next script.
+images.each_with_index do |path, i|
+	puts <<-XML2 
+	<item>
+		<title>#{File.basename(path)}</title>
+		<link>https://blog.fabianonline.de#{path}</link>
+		<pubDate>Tue, 01 Jan 1970 12:41:29 +0000</pubDate>
+		<dc:creator><![CDATA[fabian]]></dc:creator>
+		<guid isPermaLink="false">https://blog.fabianonline.de#{path}</guid>
+		<description></description>
+		<content:encoded><![CDATA[]]></content:encoded>
+		<excerpt:encoded><![CDATA[]]></excerpt:encoded>
+		<wp:post_id></wp:post_id>
+		<wp:post_date><![CDATA[1970-01-01 13:41:29]]></wp:post_date>
+		<wp:post_date_gmt><![CDATA[1970-01-01 12:41:29]]></wp:post_date_gmt>
+		<wp:comment_status><![CDATA[open]]></wp:comment_status>
+		<wp:ping_status><![CDATA[closed]]></wp:ping_status>
+		<wp:post_name><![CDATA[#{File.basename(path)}]]></wp:post_name>
+		<wp:status><![CDATA[inherit]]></wp:status>
+		<wp:post_parent>0</wp:post_parent>
+		<wp:menu_order>0</wp:menu_order>
+		<wp:post_type><![CDATA[attachment]]></wp:post_type>
+		<wp:post_password><![CDATA[]]></wp:post_password>
+		<wp:is_sticky>0</wp:is_sticky>
+		<wp:attachment_url><![CDATA[https://blog.fabianonline.de#{path}]]></wp:attachment_url>
+		<wp:postmeta>
+			<wp:meta_key><![CDATA[_wp_attached_file]]></wp:meta_key>
+			<wp:meta_value><![CDATA[#{path}]]></wp:meta_value>
+		</wp:postmeta>
+	</item>
+XML2
+end
+
+puts "
+</channel>
+</rss>
+"
--- a/finalize_export.rb
+++ b/finalize_export.rb
@ -0,0 +1,14 @@
+data = STDIN.read
+
+# Replace the image paths to be accessible in Wordpress.
+# The new path given here is the path for my case: A multi-site blog. The 1970/01
+# in the path was determined by setting a date during extract_media.rb.
+# Upload a file to your media gallery and look at the image url to determine
+# your correct path.
+images = data.scan(/(?:src|href)=&quot;(\/uploads\/.+?)&quot;/).to_a.map(&:first).uniq
+
+images.each do |path|
+	data = data.gsub("=&quot;#{path}&quot;", "=&quot;/wp-content/uploads/sites/5/1970/01/#{File.basename(path)}&quot;")
+end
+
+puts data.gsub(/([^>])\n/, "\\1 ")
--- a/wp_export.xml
+++ b/wp_export.xml
@ -0,0 +1,66 @@
+---
+---
+<?xml version="1.0" encoding="utf-8"?>
+<!-- generator="WordPress/5.5.3" created="2020-12-08 09:23" -->
+<rss version="2.0"
+	xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
+	xmlns:content="http://purl.org/rss/1.0/modules/content/"
+	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
+	xmlns:dc="http://purl.org/dc/elements/1.1/"
+	xmlns:wp="http://wordpress.org/export/1.2/"
+>
+<channel>
+	<title>fabianonline.de</title>
+	<link>https://blog.fabianonline.de</link>
+	<description>Einfach Fabian - ONLINE</description>
+	<pubDate>Tue, 08 Dec 2020 18:23:29 +0000</pubDate>
+	<language>de-DE</language>
+	<wp:wxr_version>1.2</wp:wxr_version>
+	<wp:base_site_url>https://blog.fabianonline.de</wp:base_site_url>
+	<wp:base_blog_url>https://blog.fabianonline.de</wp:base_blog_url>
+
+	<wp:author>
+		<wp:author_id>2</wp:author_id>
+		<wp:author_login><![CDATA[fabian]]></wp:author_login>
+		<wp:author_email><![CDATA[mail@fabianonline.de]]></wp:author_email>
+		<wp:author_display_name><![CDATA[Fabian]]></wp:author_display_name>
+		<wp:author_first_name><![CDATA[Fabian]]></wp:author_first_name>
+		<wp:author_last_name><![CDATA[Schlenz]]></wp:author_last_name>
+	</wp:author>
+				
+	<generator>https://wordpress.org/?v=5.5.3</generator>
+
+	<!-- ignore this... {% increment post_id %} {% increment post_id %} {% increment post_id %} {% increment post_id %} -->
+	{% for post in site.posts reversed %}
+	<item>
+		<title>{{ post.title | xml_escape }}</title>
+		<link>https://blog.fabianonline.de{{ post.url }}</link>
+		<pubDate>{{post.date | date_to_xmlschema }}</pubDate>
+		<dc:creator><![CDATA[fabian]]></dc:creator>
+		<guid isPermaLink="false">https://blog.fabianonline.de{{ post.url }}</guid>
+		<description></description>
+		<content:encoded>{{ post.content | xml_escape }}</content:encoded>
+		<excerpt:encoded><![CDATA[]]></excerpt:encoded>
+		<wp:post_id>{% increment post_id %}</wp:post_id>
+		<wp:post_date>{{ post.date | date:"%Y-%m-%d %H:%M:%S" }}</wp:post_date>
+		<wp:post_date_gmt>{{ post.date | date:"%Y-%m-%d %H:%M:%S" }}</wp:post_date_gmt>
+		<wp:comment_status><![CDATA[open]]></wp:comment_status>
+		<wp:ping_status><![CDATA[open]]></wp:ping_status>
+		<wp:post_name>{{ post.title | slugify:'latin' }}</wp:post_name>
+		<wp:status><![CDATA[publish]]></wp:status>
+		<wp:post_parent>0</wp:post_parent>
+		<wp:menu_order>0</wp:menu_order>
+		<wp:post_type><![CDATA[post]]></wp:post_type>
+		<wp:post_password><![CDATA[]]></wp:post_password>
+		<wp:is_sticky>0</wp:is_sticky>
+		{% for tag in post.tags %}
+			<category domain="post_tag" nicename="{{ tag | slugify:'latin' }}">{{tag | xml_escape }}</category>
+		{% endfor %}
+		<wp:postmeta>
+			<wp:meta_key><![CDATA[_edit_last]]></wp:meta_key>
+			<wp:meta_value><![CDATA[2]]></wp:meta_value>
+		</wp:postmeta>
+	</item>
+	{% endfor %}
+</channel>
+</rss>