簡體   English   中英

PHP腳本內存泄漏

[英]PHP Script Memory Leak

我知道這並不是最小的代碼,我已經盡力減少了。 該腳本只會消耗越來越多的內存,直到最終用完為止。 我在可能的地方使用過unset(),但似乎沒有任何效果。 MultiGet函數中似乎總是出錯,但是我不確定這是否是泄漏所在。 任何投入將不勝感激。

public function Test()
{
    $base = dirname(__FILE__) .'/';
    $prod_file = $base.'products.dbf';

    $this->dbf->load($prod_file);
    $num_rec=$ci->dbf->dbf_num_rec;

    $buffer = Array();
    for($i=0;$i<$num_rec;$i++):
        $row = $ci->dbf->getRowAssoc($i);

        $info = Array('part_number' => $row['PART_NUM'],
                      'td_group_id' => $row['GRP'],
                      'name' => 'DESCR');

        $this->db->where('td_group_id',$info['td_group_id']);
        $result = $this->db->get('tbl_categories')->row_array();
        if(isset($result['id'])):
            $info['category_id'] = $result['id'];
            $buffer[]  = $info;
        endif;

        if(count($buffer) == 100 || $i == $num_rec -1):
            $url_buffer = Array();
            foreach($buffer as $row):
                $url_buffer[] = $this->_product_url($row['part_number']);
            endforeach;

            $html_returns = $this->MultiCrawl($url_buffer);
            foreach($html_returns as $url_index=>$html):
                $more_info = $this->_extract_more_info($html);
                if($more_info):
                    $more_info['category_id'] = $buffer[$url_index]['category_id'];
                    $more_info['td_part_number'] = $buffer[$url_index]['part_number'];
                    $this->_parse_product($more_info);
                endif;
            endforeach;
            $buffer = Array();
        endif;

    endfor;



}


function MultiGet($all_urls)
{

    $useragent = $this->_useragent;
    $cookie_file = $this->_cookie_file;

    $url_index = $this->UrlIndex($all_urls);

    $return_buffer = Array();

    $mh = curl_multi_init();

    $ch = Array();
    $max_connections = 15;
    $index = 0;
    $open_connections = 0;
    $execReturnValue = true;
    $running = true;
    $max_index = count($all_urls)-1;
    $url_count = count($all_urls);
    $buffer_count = 0;

    while ($buffer_count < $url_count){

        if($open_connections < $max_connections && $index <= $max_index):
            for($i=$open_connections;$i<$max_connections && $index <= $max_index;$i++):
                $url = $all_urls[$index];
                $ch[$index] = curl_init($url);
                curl_setopt($ch[$index],CURLOPT_FOLLOWLOCATION, true);
                curl_setopt($ch[$index],CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch[$index],CURLOPT_COOKIESESSION, false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYHOST , false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYPEER , false);
                curl_setopt($ch[$index],CURLOPT_COOKIEJAR, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_COOKIEFILE, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_USERAGENT,$useragent);
                curl_multi_add_handle($mh, $ch[$index]);
                $open_connections++;
                $index++;
                $execReturnValue = curl_multi_exec($mh,$running);
                usleep(200);
            endfor;
        endif;

        $execReturnValue = curl_multi_exec($mh,$running);
        $ready=curl_multi_select($mh);


        while($info=curl_multi_info_read($mh)){
            $status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
            if($status==200){
                $successUrl=curl_getinfo($info['handle'],CURLINFO_EFFECTIVE_URL);
                $curl_index = $url_index[$successUrl];
                $return_buffer[$curl_index] = curl_multi_getcontent($ch[$curl_index]);
                $buffer_count = count($return_buffer);
                curl_multi_remove_handle($mh, $ch[$curl_index]);
                curl_close($ch[$curl_index]);
                unset($ch[$curl_index]);
                $open_connections--;
            }else{

                echo "ERROR: $status\n";
            }
        }
    } 

    curl_multi_close($mh);
    unset($mh);

    return $return_buffer;
}



private function _extract_more_info($html)
{

    $buffer = array();


    $query = "//img[@id='ctl00_cphMain_cntrlProductProfile_imgprodimage']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['td_img_url'] = $node?trim($node->getAttribute('src')):null;
    unset($result);


    $query = "//span[@class='priceLarge']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['price'] = $node?trim($node->nodeValue):null;
    if($buffer['price'] == 'Req. Auth.') return null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLtFinalPrice']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['msrp'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLTMRF']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manf_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLblUPC']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    $buffer['upc_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='black_text_WUL']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manufacturer'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='textt' and @colspan='3']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['short_description'] = $node?trim($node->nodeValue):null;
    unset($result);





    $query = "//div[@id='ctl00_cphMain_pnlMarketingDesc']//td[@class='textt']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['long_description'] = $node?trim($node->nodeValue):null;
    unset($result);

    $query = "//table[@id='ctl00_cphMain_cntrlMainSpecs_dgSpecs']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);

    if(!$table) return null;
    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;


    $buffer['main_specs']=$table_array;


    $query = "//table[@id='ctl00_cphMain_cntrlExtSpecs_tblData']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);
    $buffer['additional_specs'] = null;
    if(!$table) return $buffer;


    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;
    $buffer['additional_specs']=$table_array;;
    return $buffer;

}



private function _xquery($html,$query,$allnodes = false){
    $src = '';
    $dom = new DOMDocument();
    $node = null;
    if (@$dom->loadHTML($html)) {
        $xpath = new DOMXpath($dom);
        $nodeList = $xpath->query($query);
        if ($nodeList->length > 0) {
            $node = $allnodes==false?$nodeList->item(0):$nodeList;
        }
    }
    unset($xpath);
    unset($nodeList);
    unset($dom);
    return $node;
}

找到泄漏的策略?

  • 確保它一個泄漏(如果處理數據的1/100,是內存還是沒有釋放?1/1000?)
  • 考慮一下復雜性:如果foo為O(n), bar為O(n),而bar調用foo ,則結果可能變為O(n * n)。
  • 實驗:禁用程序的一部分,直到不再泄漏

乍一看,您正在抓取一系列url。 這些可能包含更多的URL,可以使用MultiCrawl方法進行爬MultiCrawl 您確定其中沒有循環嗎? (使用文件夾欺騙了我很多次:瀏覽“。”作為子文件夾會產生無限循環)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM