wordpress to blogger 转换代码,基于php,将wordpress后台导出的xml数据,转换成blogger可直接导入的格式。

<?php

/**

  • WordPress to Blogger Converter (Final Fix for Import Overwrite)
  • 修复重点:生成纯数字 ID,防止 Blogger 导入时因 ID 格式错误而覆盖文章
    */

$inputFile = 'wordpress.xml'; // 你的源文件
$outputPrefix = 'blogger_final_';
$postsPerFile = 50;

if (!file_exists($inputFile)) {

die("错误: 找不到输入文件 '{$inputFile}'。\n");

}

echo "正在读取 WordPress 文件...\n";

libxml_use_internal_errors(true);
$wpXml = simplexml_load_file($inputFile);

if ($wpXml === false) {

die("错误: 无法解析 XML 文件。\n");

}

// 获取命名空间
$ns = $wpXml->getNamespaces(true);
// 补全可能缺失的命名空间
if (!isset($ns['wp'])) $ns['wp'] = 'http://wordpress.org/export/1.2/';
if (!isset($ns['content'])) $ns['content'] = 'http://purl.org/rss/1.0/modules/content/';
if (!isset($ns['dc'])) $ns['dc'] = 'http://purl.org/dc/elements/1.1/';

$items = [];

// 1. 提取有效文章
if (isset($wpXml->channel->item)) {

foreach ($wpXml->channel->item as $item) {
    $wpData = $item->children($ns['wp']);
    $postType = (string)$wpData->post_type;
    $status = (string)$wpData->status;

    if (!in_array($postType, ['post', 'page'])) continue;
    if ($status === 'trash' || $status === 'auto-draft') continue;

    $items[] = $item;
}

}

$totalPosts = count($items);
echo "共找到 {$totalPosts} 篇有效文章。\n";

// 2. 分批处理
$fileIndex = 1;
$chunkedItems = array_chunk($items, $postsPerFile);

// 基准时间戳,用于生成唯一的纯数字 ID
$baseId = time();
$globalCounter = 0;

foreach ($chunkedItems as $chunk) {

$dom = new DOMDocument('1.0', 'UTF-8');
$dom->formatOutput = true;

// 创建根节点,并添加完整的 Google/Blogger 命名空间
$feed = $dom->createElementNS('http://www.w3.org/2005/Atom', 'feed');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:app', 'http://purl.org/atom/app#');
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:gd', 'http://schemas.google.com/g/2005'); // 关键:添加 gd 命名空间
$dom->appendChild($feed);

$generator = $dom->createElement('generator', 'Blogger');
$generator->setAttribute('version', '7.00');
$feed->appendChild($generator);

$feed->appendChild($dom->createElement('updated', date('Y-m-d\TH:i:s\+00:00')));

foreach ($chunk as $item) {
    $globalCounter++; 

    $wpData = $item->children($ns['wp']);
    $contentData = $item->children($ns['content']);
    $dcData = $item->children($ns['dc']);
    
    $postType = (string)$wpData->post_type;
    $status = (string)$wpData->status;
    $postDate = (string)$wpData->post_date;

    $entry = $dom->createElement('entry');

    // ========================================================
    // [核心修复] 生成纯数字 ID
    // ========================================================
    // 格式:tag:blogger.com,1999:blog-1.post-[纯数字]
    // 我们使用 (基准时间 + 计数器) 确保它是唯一的整数
    // 例如:17172345671, 17172345672...
    $pureNumericId = $baseId . sprintf('%04d', $globalCounter);
    
    // 注意这里 blog-1 是个虚拟的 Blog ID,只要 post- 后面的数字唯一即可
    $idUri = 'tag:blogger.com,1999:blog-1.post-' . $pureNumericId;
    
    $entry->appendChild($dom->createElement('id', $idUri));
    // ========================================================

    // 标题
    $title = (string)$item->title;
    $titleNode = $dom->createElement('title');
    $titleNode->setAttribute('type', 'html');
    if (trim($title) === '') { $title = "Untitled"; }
    $titleNode->appendChild($dom->createTextNode($title));
    $entry->appendChild($titleNode);

    // 内容
    $content = (string)$contentData->encoded;
    if (!empty($content)) {
        $content = nl2br($content); 
    }
    $contentNode = $dom->createElement('content');
    $contentNode->setAttribute('type', 'html');
    $contentNode->appendChild($dom->createCDATASection($content));
    $entry->appendChild($contentNode);

    // 时间
    try {
        $dateTime = new DateTime($postDate);
        $isoDate = $dateTime->format('Y-m-d\TH:i:s\+00:00');
    } catch (Exception $e) {
        $isoDate = date('Y-m-d\TH:i:s\+00:00');
    }
    $entry->appendChild($dom->createElement('published', $isoDate));
    $entry->appendChild($dom->createElement('updated', $isoDate));

    // 分类定义 (Page vs Post)
    $typeCategory = $dom->createElement('category');
    $typeCategory->setAttribute('scheme', 'http://schemas.google.com/g/2005#kind');
    if ($postType === 'page') {
        $typeCategory->setAttribute('term', 'http://schemas.google.com/blogger/2008/kind#page');
    } else {
        $typeCategory->setAttribute('term', 'http://schemas.google.com/blogger/2008/kind#post');
    }
    $entry->appendChild($typeCategory);

    // 标签/分类
    if (isset($item->category)) {
        foreach ($item->category as $cat) {
            $catName = (string)$cat;
            if ($catName !== 'Uncategorized' && trim($catName) !== '') {
                $tagNode = $dom->createElement('category');
                $tagNode->setAttribute('scheme', 'http://www.blogger.com/atom/ns#');
                $tagNode->setAttribute('term', $catName);
                $entry->appendChild($tagNode);
            }
        }
    }

    // 作者
    $authorNode = $dom->createElement('author');
    $creator = (string)$dcData->creator;
    $nameNode = $dom->createElement('name', $creator ? $creator : 'Admin');
    $authorNode->appendChild($nameNode);
    $entry->appendChild($authorNode);

    // 草稿状态
    if ($status !== 'publish') {
        $control = $dom->createElementNS('http://purl.org/atom/app#', 'app:control');
        $draft = $dom->createElementNS('http://purl.org/atom/app#', 'app:draft', 'yes');
        $control->appendChild($draft);
        $entry->appendChild($control);
    }

    $feed->appendChild($entry);
}

$filename = $outputPrefix . $fileIndex . '.xml';
$dom->save($filename);
echo "已生成文件: {$filename} (包含 " . count($chunk) . " 篇文章)\n";
$fileIndex++;

}

echo "全部完成!请尝试删除 Blogger 中已导入的错误文章,然后重新导入新文件。\n";
?>