wordpress数据导入blogger
wordpress to blogger 转换代码,基于php,将wordpress后台导出的xml数据,转换成blogger可直接导入的格式。
<?php/**
- WordPress to Blogger Converter (Final Fix for Import Overwrite)
- 修复重点:生成纯数字 ID,防止 Blogger 导入时因 ID 格式错误而覆盖文章
*/
$inputFile = 'wordpress.xml'; // 你的源文件
$outputPrefix = 'blogger_final_';
$postsPerFile = 50;
if (!file_exists($inputFile)) {
die("错误: 找不到输入文件 '{$inputFile}'。\n");}
echo "正在读取 WordPress 文件...\n";
libxml_use_internal_errors(true);
$wpXml = simplexml_load_file($inputFile);
if ($wpXml === false) {
die("错误: 无法解析 XML 文件。\n");}
// 获取命名空间
$ns = $wpXml->getNamespaces(true);
// 补全可能缺失的命名空间
if (!isset($ns['wp'])) $ns['wp'] = 'http://wordpress.org/export/1.2/';
if (!isset($ns['content'])) $ns['content'] = 'http://purl.org/rss/1.0/modules/content/';
if (!isset($ns['dc'])) $ns['dc'] = 'http://purl.org/dc/elements/1.1/';
$items = [];
// 1. 提取有效文章
if (isset($wpXml->channel->item)) {
foreach ($wpXml->channel->item as $item) {
$wpData = $item->children($ns['wp']);
$postType = (string)$wpData->post_type;
$status = (string)$wpData->status;
if (!in_array($postType, ['post', 'page'])) continue;
if ($status === 'trash' || $status === 'auto-draft') continue;
$items[] = $item;
}}
$totalPosts = count($items);
echo "共找到 {$totalPosts} 篇有效文章。\n";
// 2. 分批处理
$fileIndex = 1;
$chunkedItems = array_chunk($items, $postsPerFile);
// 基准时间戳,用于生成唯一的纯数字 ID
$baseId = time();
$globalCounter = 0;
foreach ($chunkedItems as $chunk) {
$dom = new DOMDocument('1.0', 'UTF-8');
$dom->formatOutput = true;
// 创建根节点,并添加完整的 Google/Blogger 命名空间
$feed = $dom->createElementNS('http://www.w3.org/2005/Atom', 'feed');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:app', 'http://purl.org/atom/app#');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:gd', 'http://schemas.google.com/g/2005'); // 关键:添加 gd 命名空间
$dom->appendChild($feed);
$generator = $dom->createElement('generator', 'Blogger');
$generator->setAttribute('version', '7.00');
$feed->appendChild($generator);
$feed->appendChild($dom->createElement('updated', date('Y-m-d\TH:i:s\+00:00')));
foreach ($chunk as $item) {
$globalCounter++;
$wpData = $item->children($ns['wp']);
$contentData = $item->children($ns['content']);
$dcData = $item->children($ns['dc']);
$postType = (string)$wpData->post_type;
$status = (string)$wpData->status;
$postDate = (string)$wpData->post_date;
$entry = $dom->createElement('entry');
// ========================================================
// [核心修复] 生成纯数字 ID
// ========================================================
// 格式:tag:blogger.com,1999:blog-1.post-[纯数字]
// 我们使用 (基准时间 + 计数器) 确保它是唯一的整数
// 例如:17172345671, 17172345672...
$pureNumericId = $baseId . sprintf('%04d', $globalCounter);
// 注意这里 blog-1 是个虚拟的 Blog ID,只要 post- 后面的数字唯一即可
$idUri = 'tag:blogger.com,1999:blog-1.post-' . $pureNumericId;
$entry->appendChild($dom->createElement('id', $idUri));
// ========================================================
// 标题
$title = (string)$item->title;
$titleNode = $dom->createElement('title');
$titleNode->setAttribute('type', 'html');
if (trim($title) === '') { $title = "Untitled"; }
$titleNode->appendChild($dom->createTextNode($title));
$entry->appendChild($titleNode);
// 内容
$content = (string)$contentData->encoded;
if (!empty($content)) {
$content = nl2br($content);
}
$contentNode = $dom->createElement('content');
$contentNode->setAttribute('type', 'html');
$contentNode->appendChild($dom->createCDATASection($content));
$entry->appendChild($contentNode);
// 时间
try {
$dateTime = new DateTime($postDate);
$isoDate = $dateTime->format('Y-m-d\TH:i:s\+00:00');
} catch (Exception $e) {
$isoDate = date('Y-m-d\TH:i:s\+00:00');
}
$entry->appendChild($dom->createElement('published', $isoDate));
$entry->appendChild($dom->createElement('updated', $isoDate));
// 分类定义 (Page vs Post)
$typeCategory = $dom->createElement('category');
$typeCategory->setAttribute('scheme', 'http://schemas.google.com/g/2005#kind');
if ($postType === 'page') {
$typeCategory->setAttribute('term', 'http://schemas.google.com/blogger/2008/kind#page');
} else {
$typeCategory->setAttribute('term', 'http://schemas.google.com/blogger/2008/kind#post');
}
$entry->appendChild($typeCategory);
// 标签/分类
if (isset($item->category)) {
foreach ($item->category as $cat) {
$catName = (string)$cat;
if ($catName !== 'Uncategorized' && trim($catName) !== '') {
$tagNode = $dom->createElement('category');
$tagNode->setAttribute('scheme', 'http://www.blogger.com/atom/ns#');
$tagNode->setAttribute('term', $catName);
$entry->appendChild($tagNode);
}
}
}
// 作者
$authorNode = $dom->createElement('author');
$creator = (string)$dcData->creator;
$nameNode = $dom->createElement('name', $creator ? $creator : 'Admin');
$authorNode->appendChild($nameNode);
$entry->appendChild($authorNode);
// 草稿状态
if ($status !== 'publish') {
$control = $dom->createElementNS('http://purl.org/atom/app#', 'app:control');
$draft = $dom->createElementNS('http://purl.org/atom/app#', 'app:draft', 'yes');
$control->appendChild($draft);
$entry->appendChild($control);
}
$feed->appendChild($entry);
}
$filename = $outputPrefix . $fileIndex . '.xml';
$dom->save($filename);
echo "已生成文件: {$filename} (包含 " . count($chunk) . " 篇文章)\n";
$fileIndex++;}
echo "全部完成!请尝试删除 Blogger 中已导入的错误文章,然后重新导入新文件。\n";
?>