Initial commit.
This commit is contained in:
77
extract_media.rb
Normal file
77
extract_media.rb
Normal file
@ -0,0 +1,77 @@
|
||||
puts <<-XML
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!-- generator="WordPress/5.5.3" created="2020-12-08 12:42" -->
|
||||
<rss version="2.0"
|
||||
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:wp="http://wordpress.org/export/1.2/"
|
||||
>
|
||||
|
||||
<channel>
|
||||
<title>fabianonline.de</title>
|
||||
<link>https://blog.fabianonline.de</link>
|
||||
<description>Eine weitere Wordpress-Test Websites Website</description>
|
||||
<pubDate>Tue, 08 Dec 2020 12:42:20 +0000</pubDate>
|
||||
<language>de-DE</language>
|
||||
<wp:wxr_version>1.2</wp:wxr_version>
|
||||
<wp:base_site_url>https://blog2.fabianonline.de</wp:base_site_url>
|
||||
<wp:base_blog_url>https://blog2.fabianonline.de</wp:base_blog_url>
|
||||
|
||||
<wp:author>
|
||||
<wp:author_id>2</wp:author_id>
|
||||
<wp:author_login><![CDATA[fabian]]></wp:author_login>
|
||||
<wp:author_email><![CDATA[mail@fabianonline.de]]></wp:author_email>
|
||||
<wp:author_display_name><![CDATA[fabian]]></wp:author_display_name>
|
||||
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
|
||||
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
|
||||
</wp:author>
|
||||
|
||||
<generator>https://wordpress.org/?v=5.5.3</generator>
|
||||
XML
|
||||
|
||||
# Look at all strings starting with src="/uploads/ or href="/uploads/ - you might want to change this if
|
||||
# your images are saved at another place.
|
||||
images = STDIN.read.scan(/(?:src|href)="(\/uploads\/.+?)"/).to_a.map(&:first).uniq
|
||||
|
||||
# Pay close attention to the attachment_url in this block: This is the path where Wordpress will try to load
|
||||
# the image from. This should be correct and accessible for wordpress.
|
||||
# The date is given as Jan 1, 1970 so we know Wordpress will put the images into the path 1970/01. This will
|
||||
# be relied on in the next script.
|
||||
images.each_with_index do |path, i|
|
||||
puts <<-XML2
|
||||
<item>
|
||||
<title>#{File.basename(path)}</title>
|
||||
<link>https://blog.fabianonline.de#{path}</link>
|
||||
<pubDate>Tue, 01 Jan 1970 12:41:29 +0000</pubDate>
|
||||
<dc:creator><![CDATA[fabian]]></dc:creator>
|
||||
<guid isPermaLink="false">https://blog.fabianonline.de#{path}</guid>
|
||||
<description></description>
|
||||
<content:encoded><![CDATA[]]></content:encoded>
|
||||
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
|
||||
<wp:post_id></wp:post_id>
|
||||
<wp:post_date><![CDATA[1970-01-01 13:41:29]]></wp:post_date>
|
||||
<wp:post_date_gmt><![CDATA[1970-01-01 12:41:29]]></wp:post_date_gmt>
|
||||
<wp:comment_status><![CDATA[open]]></wp:comment_status>
|
||||
<wp:ping_status><![CDATA[closed]]></wp:ping_status>
|
||||
<wp:post_name><![CDATA[#{File.basename(path)}]]></wp:post_name>
|
||||
<wp:status><![CDATA[inherit]]></wp:status>
|
||||
<wp:post_parent>0</wp:post_parent>
|
||||
<wp:menu_order>0</wp:menu_order>
|
||||
<wp:post_type><![CDATA[attachment]]></wp:post_type>
|
||||
<wp:post_password><![CDATA[]]></wp:post_password>
|
||||
<wp:is_sticky>0</wp:is_sticky>
|
||||
<wp:attachment_url><![CDATA[https://blog.fabianonline.de#{path}]]></wp:attachment_url>
|
||||
<wp:postmeta>
|
||||
<wp:meta_key><![CDATA[_wp_attached_file]]></wp:meta_key>
|
||||
<wp:meta_value><![CDATA[#{path}]]></wp:meta_value>
|
||||
</wp:postmeta>
|
||||
</item>
|
||||
XML2
|
||||
end
|
||||
|
||||
puts "
|
||||
</channel>
|
||||
</rss>
|
||||
"
|
Reference in New Issue
Block a user