//大文件排序 function countsLines($path){ $fd = fopen($path,"r"); $total=0; while(!feof($fd)){ $total++; fgets($fd); } return $total; } $filePath = "./file.dat"; function checkFiles($path,$rows=5000){ $totalFiles = countsLines($path); $totalFiles=ceil($totalFiles/$rows); $files=[]; $fd=fopen($path,"r"); for($i=1;$i<=$totalFiles;$i++){ $fileName=$i.".dat"; $files[]=$fileName; $sd = fopen($fileName,"w+"); $j=1; while(!feof($fd) && $j <= $rows){ fputs($sd,fgets($fd)); $j++; } fclose($sd); } fclose($fd); return $files; } function cSort($files){ foreach ($files as $key => $file) { $content = file($file); $content = array_map(function($value){ return trim($value); },$content); sort($content); $fd = fopen($file,"w+"); foreach ($content as $key => $value) { $value=$value.PHP_EOL; fputs($fd,$value); } } } //合并数组 function mergeAllData($files){ while(count($files) > 1){ $fname1 = array_shift($files); $fname2 = array_shift($files); $newFileName = str_replace(".dat","", $fname1)."-".str_replace(".dat","",$fname2).".dat"; $fd1= fopen($fname1,"r"); $fd2 = fopen($fname2,"r"); $fd3 = fopen($newFileName,"w+"); $line1 = fgets($fd1); $line2 = fgets($fd2); while(true){ $line1 = intval($line1); $line2 = intval($line2); if($line1 < $line2){ fputs($fd3,$line1.PHP_EOL); $line1 = fgets($fd1); if(feof($fd1)==true || $line1 ===""){ fputs($fd3,$line2.PHP_EOL); break; } }else{ fputs($fd3,$line2.PHP_EOL); $line2=fgets($fd2); if(feof($fd2)==true || $line2 === ""){ fputs($fd3,$line1.PHP_EOL); break; } } } while(!feof($fd1)){ $str =fgets($fd1); if($str != ""){ fputs($fd3,$str); } } while(!feof($fd2)){ $str =fgets($fd2); if($str != ""){ fputs($fd3,$str); } } fclose($fd1); fclose($fd2); fclose($fd3); array_unshift($files, $newFileName); } } //切割 $files = checkFiles($filePath); //合并 cSort($files); mergeAllData($files);
原文地址:https://www.cnblogs.com/zh718594493/p/12089477.html
时间: 2024-10-13 16:23:47