After wrote several posts related to how importing comment from disqus to wordpress, i received so many email asking the complete (copy-paste-ready) script to do this. So i decide to share it, and hope that it can help and can be continuously improved by the community.

As usual don’t forget to backup your wordpress comment database before trying this script, and fill in the database information (hostname, database name, username & password).

So here it is the code:

<?php
//    start to count the timer
$start        = microtime(true);
$max_duration = ”;

//    get content of file and parse the xml

$xml = simplexml_load_file(‘yourmxlfilesource.xml’);

//    initiate database connection

$database_info[‘hostname’] = ""; //    database hostname

$database_info[‘database’] = ""; //    database name

$database_info[‘username’] = ""; //    database username

$database_info[‘password’] = ""; //    database password

$database_connect = mysql_pconnect($database_info[‘hostname’], $database_info[‘username’], $database_info[‘password’]) or trigger_error(mysql_error(), E_USER_ERROR);

mysql_select_db($database_info[‘database’], $database_connect);

$i = 0;

//    get all the comment from xml file

$comments = get_post();

//    get all the post title array from xml file

$post_title = get_post_title_array();

$comment_result = array();

$temp = array();

//    create loop to convert from xml comment into wordpress-format comment

foreach ($comments as $comment) {
   
    $start_sub                         = microtime(true);
    $comment_result[‘comment_post_ID’] = get_post_id($comment->thread->attributes(‘dsq’, TRUE)->id);
   
    $comment_result[‘comment_author’] = $comment->author->name;
   
    $comment_result[‘comment_author_email’] = $comment->author->email;
   
    $comment_result[‘comment_author_url’] = ”;
   
    $comment_result[‘comment_author_IP’] = $comment->ipAddress;
   
    $comment_result[‘comment_date’] = sanitize_date($comment->createdAt);
   
    $comment_result[‘comment_date_gmt’] = sanitize_date($comment->createdAt);
   
    $comment_result[‘comment_content’] = strip_tags(mysql_real_escape_string($comment->message), ‘<br><img><a>’);
   
    $comment_result[‘comment_karma’] = 1;
    //    check if comment is spam, deleted or approved
    if ($comment->isSpam == ‘true’) {
       
        $comment_approved = ‘spam’;
       
    } else if ($comment->isDeleted == ‘true’) {
       
        $comment_approved = ‘trash’;
       
    } else {
       
        $comment_approved = 1;
       
    }
    $comment_result[‘comment_approved’] = $comment_approved;
   
    $comment_result[‘comment_agent’] = ”;
   
    $comment_result[‘comment_type’] = ”;
   
    $comment_result[‘comment_parent’] = ”;
   
    $comment_result[‘user_id’] = ”;
    //    store the wordpress format comment into temporary variable
    $temp[$i]                  = $comment_result;
    //    insert the wordpress format comment into wp database
   
    insert_comment($temp[$i]);
   
    $duration[$i] = microtime(true) – $start_sub;
    $i++;
   
}

echo ‘max duration : ‘ . max($duration) . ‘<br/>’;

echo ‘min duration : ‘ . min($duration) . ‘<br/>’;

echo ‘average duration : ‘ . (array_sum($duration) / count($duration)) . ‘<br/>’;

 

//    show the total duration of process

echo ‘total duration : ‘ . (microtime(true) – $start);

/////////    define function here

function insert_comment($comment)
{
   
    global $database_connect;
    //    function to insert the comment into wp database
    $field = ”;
   
    $values = ”;
    foreach ($comment as $key => $value) {
       
        //    create sql query to insert the comment
       
        $field .= ‘`’ . $key . ‘`’ . ‘,’;
       
        $values .= ‘"’ . $value . ‘"’ . ‘,’;
       
    }
    $field = rtrim($field, ‘,’);
   
    $values = rtrim($values, ‘,’);
    //    insert the comment into the database
    $query  = "INSERT INTO `vtm7487_comments` ($field) VALUES ($values)";
   
    $query_result = mysql_query($query, $database_connect) or die(mysql_error());
   
}

function sanitize_date($date)
{
   
    //  remove the additional string from the date
    $date = str_replace(‘T’, ‘ ‘, $date);
   
    $date = str_replace(‘Z’, ‘ ‘, $date);
    return $date;
   
}

function get_post_id($thread)
{
   
    global $post_title, $database_connect;
   
    //  get wordpress post id from disqus thread id
    $thread_title = find_thread(‘id’, $thread, ‘title’); //  get the title of the post
    $thread_title = explode(‘/’, $thread_title);
   
    $thread_title = $thread_title[count($thread_title) – 1];
   
    $thread_title       = str_replace(‘-‘, ‘ ‘, $thread_title);
    $thread_title       = str_replace(‘.html’, ”, $thread_title);
    $post_title_closest = get_closest_post_title($thread_title, $post_title);
    //  get the wordpress post id from the title of the post
    $query              = "SELECT `ID` FROM `vtm7487_posts` WHERE `post_title` = ‘$post_title_closest’ LIMIT 1";
   
    $query_result = mysql_query($query, $database_connect) or die(mysql_error());
    $query_result_row = mysql_fetch_assoc($query_result);
   
    return $query_result_row[‘ID’];
}

function get_closest_post_title($input, $words)
{
   
    // no shortest distance found, yet
   
    $shortest = -1;
    // loop through words to find the closest
   
    foreach ($words as $word) {
        // calculate the distance between the input word,
       
        // and the current word
       
        $lev = levenshtein($input, $word);
        // check for an exact match
       
        if ($lev == 0) {
            // closest word is this one (exact match)
           
            $closest = $word;
           
            $shortest = 0;
            // break out of the loop; we’ve found an exact match
            break;
           
        }
        // if this distance is less than the next found shortest
       
        // distance, OR if a next shortest word has not yet been found
       
        if ($lev <= $shortest || $shortest < 0) {
           
            // set the closest match, and shortest distance
           
            $closest = $word;
           
            $shortest = $lev;
           
        }
    }
   
    return $closest;
   
}

function get_post_title_array()
{
   
    //  get wordpress post id from disqus thread id
    global $database_connect;
    //  get the wordpress post id from the title of the post
    $query = "SELECT DISTINCT(`post_title`) FROM `vtm7487_posts`";
   
    $query_result = mysql_query($query, $database_connect) or die(mysql_error());
    $query_result_row = mysql_fetch_assoc($query_result);
    $i                = 0;
    do {
       
        $result[$i] = $query_result_row[‘post_title’];
       
        $i++;
       
    } while ($query_result_row = mysql_fetch_assoc($query_result));
    return $result;
}

function find_thread($category, $source_value, $return_category)
{
   
    //    function to get thread information
    global $xml;
    foreach ($xml->children() as $row) {
        if ((int) $row->attributes(‘dsq’, TRUE)->id == (int) $source_value) {
           
            return $row->$return_category;
           
        }
       
    }
}

function get_post()
{
   
    //    function to get all post from xml data
    global $xml;
    $i = 0;
    foreach ($xml->children() as $key => $value) {
        if ($key == ‘post’) {
            $result[$i] = $value;
           
            $i++;
           
        }
       
    }
    return $result;
   
}

?>

 

*please post your comment if you find any issue using this script, really appreaciate it & use it at your own risk