简体   繁体   中英

Why are these queries so slow?

I have analyzed 3 million images using libpuzzle . 2 million from my main server and 1 million from another. I would like to combine the information into 1 MySQL database.

I need to take records in test_images_pending database and insert them into test_images but I have to do it in a way where there isn't duplicated data.

test_images has 115 million records total across all tables, words having 110 million by itself. Size ~4.4 GB

test_images_pending has 69 million and 65 million respectfully. Size ~2.6 GB

I have 8GB ram on my computer, and I am willing to load everything (or try) in memory if I have to, to speed things up.

I hoping with some optimizations to my code and or techniques to make MySQL faster I can improve the rate from about 2 pictures per second (from test_images_pending.picture table) to something more manageable. The very least would be something like 100 pictures per second.

Here is the table setup for both test_images and test_images_pending :

--
-- Table structure for table `errors`
--

CREATE TABLE IF NOT EXISTS `errors` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `url` varchar(255) NOT NULL,
  `num` int(11) NOT NULL,
  `pid` bigint(20) unsigned NOT NULL,
  `error` varchar(512) NOT NULL,
  `datetime` datetime NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM  DEFAULT CHARSET=latin1 AUTO_INCREMENT=245688 ;

-- --------------------------------------------------------

--
-- Table structure for table `pictures`
--

CREATE TABLE IF NOT EXISTS `pictures` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `digest` char(32) NOT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `idx_digest` (`digest`)
) ENGINE=MyISAM  DEFAULT CHARSET=latin1 AUTO_INCREMENT=1107725 ;

-- --------------------------------------------------------

--
-- Table structure for table `signatures`
--

CREATE TABLE IF NOT EXISTS `signatures` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `compressed_signature` varchar(338) NOT NULL,
  `picture_id` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `picture_id` (`picture_id`)
) ENGINE=MyISAM  DEFAULT CHARSET=latin1 AUTO_INCREMENT=1107725 ;

-- --------------------------------------------------------

--
-- Table structure for table `stored_pictures`
--

CREATE TABLE IF NOT EXISTS `stored_pictures` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `url` varchar(255) NOT NULL,
  `pid` bigint(20) unsigned NOT NULL,
  `num` int(11) NOT NULL,
  `updated_at` datetime DEFAULT NULL,
  `created_at` datetime DEFAULT NULL,
  `picture_id` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `idx_url` (`url`),
  KEY `idx_picture_id` (`picture_id`)
) ENGINE=MyISAM  DEFAULT CHARSET=latin1 AUTO_INCREMENT=2773867 ;

-- --------------------------------------------------------

--
-- Table structure for table `words`
--

CREATE TABLE IF NOT EXISTS `words` (
  `pos_and_word` char(5) NOT NULL,
  `signature_id` int(11) NOT NULL,
  KEY `idx_pos_and_word` (`pos_and_word`),
  KEY `signature_id` (`signature_id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
--

Here is my php PDO code I am running:

<html>
<head>
    <link href="../css/print.css" rel="stylesheet" type="text/css" media="print" /> <!-- siehe screen.css -->
    <link href="../css/screen.css" rel="stylesheet" type="text/css" media="screen, projection" /> 
    <!--[if lte IE 6]><link rel="stylesheet" href="../css/ielte6.css" type="text/css" media="screen" /><![endif]--> 
</head>
<body>
<?php
    ini_set('max_execution_time', 0);

    $dbh = new PDO("mysql:host=127.0.0.1;port=3306;dbname=test_images_pending;charset=utf-8", "root", "");
    $dbh->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION );
    $dbh->setAttribute(PDO::ATTR_AUTOCOMMIT, FALSE);


    try {
        $query = "select id,digest from test_images_pending.pictures"; 
        $sth = $dbh->prepare($query);
        $sth->execute();

        while ($pending_pictures_rows = $sth->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {

            // Print out what id it's on.
            print $pending_pictures_rows['id']."<br>";
            buffer_flush();

            try {
                $dbh->beginTransaction(); 

                $query = "SELECT COUNT(id) from test_images.pictures WHERE digest = :digest";
                $sth1 = $dbh->prepare($query);
                $sth1->bindParam(':digest', $pending_pictures_rows['digest']);
                $sth1->execute();

                $count = $sth1->fetchColumn();

                if ($count == 1) {



                    $query = "SELECT id from test_images.pictures WHERE digest = :digest";
                    $sth2 = $dbh->prepare($query);
                    $sth2->bindParam(':digest', $pending_pictures_rows['digest']);
                    $sth2->execute();

                    $correct_pic_id = $sth2->fetchColumn();

                    if(!isset($correct_pic_id) or empty($correct_pic_id)) {
                        throw new PDOException('correct_pic_id was empty');
                    }

                    $query = "select * from test_images_pending.stored_pictures WHERE picture_id = :picture_id"; 
                    $sth3 = $dbh->prepare($query);
                    $sth3->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth3->execute();

                    while ($row = $sth3->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {

                        $query = "INSERT INTO test_images.stored_pictures 
                                    (id, url, pid, num, updated_at, created_at, picture_id) 
                                  VALUES 
                                    (default, :url, :pid, :num, :updated_at, :created_at, :picture_id);";

                        $sth4 = $dbh->prepare($query);
                        $sth4->bindParam(':url', $row['url']);
                        $sth4->bindParam(':pid', $row['pid']);
                        $sth4->bindParam(':num', $row['num']);
                        $sth4->bindParam(':updated_at', $row['updated_at']);
                        $sth4->bindParam(':created_at', $row['created_at']);
                        $sth4->bindParam(':picture_id', $correct_pic_id);
                        $sth4->execute();
                    }

                    $query = "DELETE FROM test_images_pending.stored_pictures WHERE picture_id = :picture_id;";
                    $sth5 = $dbh->prepare($query);
                    $sth5->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth5->execute();

                    $query = "select id from test_images_pending.signatures WHERE picture_id = :picture_id;"; 
                    $sth6 = $dbh->prepare($query);
                    $sth6->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth6->execute();

                    $signature_id = $sth6->fetchColumn();

                    if(!isset($signature_id) or empty($signature_id)) {
                        throw new PDOException('signature_id was empty');
                    }

                    $query = "DELETE FROM test_images_pending.words WHERE signature_id = :signature_id;"; 
                    $sth7 = $dbh->prepare($query);
                    $sth7->bindParam(':signature_id', $signature_id);
                    $sth7->execute();

                    $query = "DELETE FROM test_images_pending.signatures WHERE picture_id = :picture_id";        
                    $sth8 = $dbh->prepare($query);
                    $sth8->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth8->execute();

                    $query = "DELETE FROM test_images_pending.pictures WHERE digest = :digest";                                  
                    $sth9 = $dbh->prepare($query);
                    $sth9->bindParam(':digest', $pending_pictures_rows['digest']);
                    $sth9->execute();
                } else if ($count == 0){



                    $query = "INSERT INTO test_images.pictures
                                (id, digest) 
                              VALUES 
                                (default, :digest);";

                    $sth2 = $dbh->prepare($query);
                    $sth2->bindParam(':digest', $pending_pictures_rows['digest']);
                    $sth2->execute();

                    $new_pic_id = $dbh->lastInsertId();


                    $query = "select * from test_images_pending.stored_pictures WHERE picture_id = :picture_id"; 
                    $sth3 = $dbh->prepare($query);
                    $sth3->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth3->execute();

                    while ($row = $sth3->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {

                        $query = "INSERT INTO test_images.stored_pictures 
                                    (id, url, pid, num, updated_at, created_at, picture_id) 
                                  VALUES 
                                    (default, :url, :pid, :num, :updated_at, :created_at, :picture_id);";

                        $sth4 = $dbh->prepare($query);
                        $sth4->bindParam(':url', $row['url']);
                        $sth4->bindParam(':pid', $row['pid']);
                        $sth4->bindParam(':num', $row['num']);
                        $sth4->bindParam(':updated_at', $row['updated_at']);
                        $sth4->bindParam(':created_at', $row['created_at']);
                        $sth4->bindParam(':picture_id', $new_pic_id);
                        $sth4->execute();
                    }




                    $query = "DELETE FROM test_images_pending.stored_pictures WHERE picture_id = :picture_id;";
                    $sth5 = $dbh->prepare($query);
                    $sth5->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth5->execute();

                    $query = "select id,compressed_signature from test_images_pending.signatures WHERE picture_id = :picture_id;"; 
                    $sth6 = $dbh->prepare($query);
                    $sth6->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth6->execute();
                    $fetched = $sth6->fetch(PDO::FETCH_ASSOC);

                    $signature_id = $fetched['id'];

                    if(!isset($signature_id) or empty($signature_id)) {
                        print_r($sth6->fetch(PDO::FETCH_ASSOC));
                        throw new PDOException('signature_id was empty');
                    }

                    $compressed_signature = $fetched['compressed_signature'];

                    if(!isset($compressed_signature) or empty($compressed_signature)) {
                        print_r($sth6->fetch(PDO::FETCH_ASSOC));
                        throw new PDOException('compressed_signature was empty');
                    }

                    $query = "INSERT INTO test_images.signatures
                                (id, compressed_signature, picture_id)
                              VALUES
                                (default, :compressed_signature, :picture_id);";

                    $sth7 = $dbh->prepare($query);
                    $sth7->bindParam(':picture_id', $new_pic_id);
                    $sth7->bindParam(':compressed_signature', $compressed_signature);
                    $sth7->execute();

                    $new_sig_id = $dbh->lastInsertId();

                    $query = "SELECT pos_and_word FROM test_images_pending.words WHERE signature_id = :signature_id";  
                    $sth8 = $dbh->prepare($query);
                    $sth8->bindParam(':signature_id', $signature_id);
                    $sth8->execute();

                    while ($row = $sth8->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {

                        $query = "INSERT INTO test_images.words 
                                    (pos_and_word, signature_id)
                                  VALUES 
                                    (:pos_and_word, :signature_id);";

                        $sth9 = $dbh->prepare($query);
                        $sth9->bindParam(':pos_and_word', $row['pos_and_word']);
                        $sth9->bindParam(':signature_id', $new_sig_id);
                        $sth9->execute();
                    }

                    $query = "DELETE FROM test_images_pending.words WHERE signature_id = :signature_id;"; 
                    $sth10 = $dbh->prepare($query);
                    $sth10->bindParam(':signature_id', $signature_id);
                    $sth10->execute();

                    $query = "DELETE FROM test_images_pending.signatures WHERE picture_id = :picture_id";        
                    $sth11 = $dbh->prepare($query);
                    $sth11->bindParam(':picture_id', $pending_pictures_rows['id']);
                    $sth11->execute();

                    $query = "DELETE FROM test_images_pending.pictures WHERE digest = :digest";                                  
                    $sth12 = $dbh->prepare($query);
                    $sth12->bindParam(':digest', $pending_pictures_rows['digest']);
                    $sth12->execute();


                } else {
                    throw new PDOException("Found more than 1 match for the digest '{$pending_pictures_rows['digest']}' in 'test_images.pictures' ", $query);
                }

                $dbh->commit(); 
            } catch (PDOException $e) {
                $dbh->rollback(); 
                print "<pre>"; print_r($e); print "</pre>"; exit;
            }
        }

        try {

            $dbh->beginTransaction();

            $query = "SELECT * FROM test_images_pending.errors";
            $sth13 = $dbh->prepare($query);
            $sth13->execute();

            while ($row = $sth13->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {

                $query = "INSERT INTO test_images.errors 
                            (id, url, num, pid, error, datetime)
                          VALUES 
                            (default, :url, :num, :pid, :error, :datetime);";


                $sth14 = $dbh->prepare($query);
                $sth14->bindParam(':url', $row['url']);
                $sth14->bindParam(':num', $row['num']);
                $sth14->bindParam(':pid', $row['pid']);
                $sth14->bindParam(':error', $row['error']);
                $sth14->bindParam(':datetime', $row['datetime']);
                $sth14->execute();
            }

            $query = "DELETE FROM test_images_pending.errors WHERE 1";       
            $sth15 = $dbh->prepare($query);
            $sth15->execute();

            $dbh->commit(); 
        } catch (PDOException $e) {
            $dbh->rollback(); 
            print "<pre>"; print_r($e); print "</pre>"; exit;
        }
    } catch (PDOException $e) {
        print "<pre>"; print_r($e); print "</pre>"; exit;
    }


function buffer_flush(){

    echo str_pad('', 512);
    echo '<!-- -->';

    if(ob_get_length()){

        @ob_flush();
        @flush();
        @ob_end_flush();

    }

    @ob_start();

}
?> 
</body>
</html>

Edit:

Some profiling:

This INSERT gets ran 100 times each non-similar picture (~5 every 6 thus far). It normally takes 0.5 to 0.9 seconds to finish the while loop with an average of 0.007 per INSERT.

$query = "INSERT INTO test_images.words 
        (pos_and_word, signature_id)
        VALUES 
        (:pos_and_word, :signature_id);";

$sth9 = $dbh->prepare($query);
$sth9->bindParam(':pos_and_word', $row['pos_and_word']);
$sth9->bindParam(':signature_id', $new_sig_id);
$sth9->execute();
DELETE FROM test_images_pending.stored_pictures WHERE picture_id = :picture_id;

select * from test_images_pending.stored_pictures WHERE picture_id = :picture_id

DELETE FROM test_images_pending.stored_pictures WHERE picture_id = :picture_id;

all take an average of 0.15 seconds or so per similar picture (~1 out of 6).

Edit 2:

Going by this benchmarking: http://we-love-php.blogspot.com/2012/08/mass-inserts-updates-sqlite-vs-mysql.html

Replacing the slow while loop previously mentioned in Edit 1 with just simple writing to text file such as:

$inserts = array();
while ($row = $sth8->fetch(PDO::FETCH_ASSOC, PDO::FETCH_ORI_NEXT)) {
    $inserts[] = "(".$dbh->quote($row['pos_and_word']).", ".$dbh->quote($new_sig_id).")";
}
$query = "INSERT INTO imvu_images.words (pos_and_word, signature_id) VALUES " . implode(',',$inserts) . ";";            
file_put_contents("inserts.sql", $query."\n", FILE_APPEND);

Makes it al ot faster. Not 100 per second though, more like 10-20. I can then just execute the SQL later on and it runs instantly without delay. (That's why I think there is an issue with my code). The reason why I want 100 per second is because I can analyze images and insert them into 1 database at 30 per second. At this rate, it's faster for me to analyze 2 million images and have it insert each one by one than it is to mass insert the rows. This doesn't seem right, that the server can download 30 images, analyze 30 images, and then do 30 inserts in 1 second yet just doing these various SQL statements cannot even match that.

Edit 3:

Updated my.ini with:

key_buffer_size=4000M
read_buffer_size=32M
read_rnd_buffer_size=200M
bulk_insert_buffer_size=1000M
myisam_max_sort_file_size=10000M
myisam_repair_threads=1
tmp_table_size = 1024M
max_heap_table_size = 1024M
join_buffer_size=8M
sort_buffer_size=8M
max_allowed_packet=32M
max_connect_errors=10
myisam_sort_buffer_size=256M
query_cache_limit=12M
query_cache_size=256M
query_cache_type=1

Which seems to have improved performance 2 fold without using the file_put_contents hack. Still though, 5 records a second isn't cutting it.

The reason this process is so slow is not because the individual queries are slow - in fact, I'm surprised at how fast it's all going - but because you're processing millions of records, one at a time, by looping through each record in your outer resultset. What SQL is good at is processing millions of records all in one go.

There's too much business logic in your code for me to want to re-write the whole thing for you, but I think you want to have a re-write the code along the lines of

INSERT INTO test_images.pictures
      (id, digest) 
SELECT id, digest
from  test_images_pending.pictures
where id not in 
   (select id from test_images.pictures)

Do the same for the other tables. This should run pretty fast - if you've got a good indexing scheme, you'll almost certainly be I/O bound. You should definitely reach more than 2 records per second!

Why can't you use Mysql stored procedures? They execute in Mysql server directly and Faster than Query execution from php. http://dev.mysql.com/doc/refman/5.0/en/create-procedure.html

Call the stored procedure from php like this:

$res = mysql_query('call sp_sel_test()');
if ($res === FALSE) {
    die(mysql_error());
}

You need to set client flags while connecting for using stored procedures with php. Use this: mysql_connect($this->h,$this->u,$this->p,false,65536);

See MySQL Client Flags for more details.

Edit: The main issue was indices on the source tables being INSERTing into. It is recommended to drop any non needed indices before doing mass inserts, then rebuild afterwords.

With a combination of tweaking the mysql settings and the following code, I was able to get the duplicate images (The join portion) to do 50,000 in 30 seconds or so, 25 seconds being just the JOIN operation.

The 2nd part I am using NOT IN and this is where most of the time occurs but it inserts at a rate of 800 records per second, so it exceeds my goal.

I am going to leave the question open for a bit longer to see if it can be optimized more, because I have 39 million records to process.

<html>
<head>
      <link href="../css/print.css" rel="stylesheet" type="text/css" media="print" /> <!-- siehe screen.css -->
    <link href="../css/screen.css" rel="stylesheet" type="text/css" media="screen, projection" /> 
    <!--[if lte IE 6]><link rel="stylesheet" href="../css/ielte6.css" type="text/css" media="screen" /><![endif]--> 
</head>
<body>
 <?php
    ini_set('max_execution_time', 0);
    $benchmark = false;
    $delete = false;
    $dbh = new PDO("mysql:host=127.0.0.1;port=3306;dbname=test_images_pending;charset=utf-8", "root", "");
    $dbh->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION );
    $dbh->setAttribute(PDO::ATTR_AUTOCOMMIT, FALSE);

    $timers = array();

    try {
        $query = "SELECT * FROM test_images.pictures
                INNER JOIN test_images_pending.pictures
                USING ( digest )";

        $sth = $dbh->prepare($query);
        $sth->execute();

        while ($join_rows = $sth->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) {

            $digest =  $join_rows[0];
            $correct_pic_id = $join_rows[1];
            $wrong_pic_id = $join_rows[2];


            try {
                $dbh->beginTransaction(); 



                $query = "INSERT INTO test_images.stored_pictures 
                                (url, pid, num, updated_at, created_at, picture_id) 
                          SELECT 
                                url, pid, num, updated_at, created_at, :correct_pic_id FROM test_images_pending.stored_pictures WHERE picture_id = :wrong_pic_id;";

                $sth4 = $dbh->prepare($query);
                $sth4->bindParam(':correct_pic_id', $correct_pic_id);
                $sth4->bindParam(':wrong_pic_id', $wrong_pic_id);
                $sth4->execute();


                $dbh->commit(); 
            } catch (PDOException $e) {
                $dbh->rollback(); 
                print "<pre>"; print_r($e); print "</pre>"; exit;
            }
        }

    } catch (PDOException $e) {
        print "<pre>"; print_r($e); print "</pre>"; exit;
    }





    try {


        $query = "SELECT COUNT(id) FROM  `signatures` WHERE (`id` -  `picture_id` !=0)  ";
        $sth = $dbh->prepare($query);
        $sth->execute();

        $count = $sth->fetchColumn();
        if($count > 0) {
            die("we got a sig that aint matching its pic_id, we cant assume sig_id = pic_id. Back to drawing board");
        }
        $sth = null;


        $query = "  SELECT  digest, id
                    FROM    test_images_pending.pictures
                    WHERE   digest NOT IN
                        (
                        SELECT  digest
                        FROM    test_images.pictures
                        )"; 
        $sth = $dbh->prepare($query);
        $sth->execute();

        while ($not_in_rows = $sth->fetch(PDO::FETCH_NUM, PDO::FETCH_ORI_NEXT)) {

            $digest =  $not_in_rows[0];
            $wrong_pic_id = $not_in_rows[1];


            try {
                $dbh->beginTransaction(); 

                $query = "INSERT INTO test_images.pictures
                            (id, digest) 
                          VALUES 
                            (default, :digest);";

                $sth2 = $dbh->prepare($query);
                $sth2->bindParam(':digest', $digest);
                $sth2->execute();

                $new_pic_id = $dbh->lastInsertId();



                $query = "INSERT INTO test_images.stored_pictures 
                                (url, pid, num, updated_at, created_at, picture_id) 
                          SELECT 
                                url, pid, num, updated_at, created_at, :new_pic_id FROM test_images_pending.stored_pictures WHERE picture_id = :wrong_pic_id;";

                $sth3 = $dbh->prepare($query);
                $sth3->bindParam(':new_pic_id', $new_pic_id);
                $sth3->bindParam(':wrong_pic_id', $wrong_pic_id);
                $sth3->execute();



                $query = "INSERT INTO test_images.signatures 
                                (compressed_signature, picture_id) 
                          SELECT 
                                compressed_signature, :new_pic_id FROM test_images_pending.signatures WHERE picture_id = :wrong_pic_id;";

                $sth4 = $dbh->prepare($query);
                $sth4->bindParam(':new_pic_id', $new_pic_id);
                $sth4->bindParam(':wrong_pic_id', $wrong_pic_id);
                $sth4->execute();
                $new_sig_id = $dbh->lastInsertId();


                $query = "INSERT INTO test_images.words 
                            (pos_and_word, signature_id)
                          SELECT 
                            pos_and_word, :new_sig_id FROM test_images_pending.words WHERE signature_id = :old_sig_id

                            ";

                $sth9 = $dbh->prepare($query);
                $sth9->bindParam(':old_sig_id', $wrong_pic_id);
                $sth9->bindParam(':new_sig_id', $new_sig_id);
                $sth9->execute();



                $dbh->commit(); 
            } catch (PDOException $e) {
                $dbh->rollback(); 
                print "<pre>"; print_r($e); print "</pre>"; exit;
            }
        }
    } catch (PDOException $e) {
        print "<pre>"; print_r($e); print "</pre>"; exit;
    }

function buffer_flush(){

    echo str_pad('', 512);
    echo '<!-- -->';

    if(ob_get_length()){

        @ob_flush();
        @flush();
        @ob_end_flush();

    }

    @ob_start();

}
 ?> 
</body>
</html>

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM