PHP Word provides \PhpOffice\PhpWord\IOFactory::createReader . its 'load' method returns contents as an object
$objReader = \PhpOffice\PhpWord\IOFactory::createReader('Word2007');
$phpWord = $objReader->load("with_table_macros.docx");
However , this object is too complex if the intention is to just get the text contents for search. The complete Code would look like this.
<?php
//https://phpword.readthedocs.io/en/latest/writersreaders.html
//https://stackoverflow.com/questions/10646445/read-word-document-in-php
require_once 'vendor/autoload.php';
use PhpOffice\PhpWord\TemplateProcessor;
$templatesDocsFolder = "docs/templates/";
$templatesDocName = "with_table_macros.docx";
$templatesDocPath = $templatesDocsFolder.$templatesDocName;
$phpWord = \PhpOffice\PhpWord\IOFactory::createReader('Word2007')->load($templatesDocPath);
foreach($phpWord->getSections() as $section) {
foreach($section->getElements() as $element) {
if ($element instanceof PhpOffice\PhpWord\Element\Table)
{
echo '<p>table contents start</p>';
foreach ($element->getRows() as $row)
{
foreach ($row->getCells() as $cell)
{
//print_r( get_class($cell->getElements()[0]));
$cEl = $cell->getElements()[0];
if ($cEl instanceof PhpOffice\PhpWord\Element\Text)
{
echo $cEl->getText() .'<br>';
}
elseif ($cEl instanceof PhpOffice\PhpWord\Element\TextRun){
if (count($cEl->getElements())>0 and $cEl->getElements()[0] instanceof PhpOffice\PhpWord\Element\Text)
{
echo $cEl->getElements()[0]->getText();
}
}
}
echo '<br>';
}
echo '<p>table ended </p>';
}//if ($element instanceof PhpOffice\PhpWord\Element\Table)
if ($element instanceof PhpOffice\PhpWord\Element\Text)
{
echo $element->getText() .'<br>';
}
elseif ($element instanceof PhpOffice\PhpWord\Element\TextRun){
if (count($element->getElements())>0 and $element->getElements()[0] instanceof PhpOffice\PhpWord\Element\Text)
{
echo $element->getElements()[0]->getText();
}
}elseif(method_exists($element,'getText')) {
echo($element->getText() . "<br>");
}
}
}
?>
So a simpler way could be adopted using PHP 'zip_read'
<?php
//https://phpword.readthedocs.io/en/latest/writersreaders.html
//https://stackoverflow.com/questions/10646445/read-word-document-in-php
require_once 'vendor/autoload.php';
use PhpOffice\PhpWord\TemplateProcessor;
function read_docx($filename){
$striped_content = '';
$content = '';
if(!$filename || !file_exists($filename)) return false;
$zip = zip_open($filename);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);
return $striped_content;
}
$doc_contents = read_docx("WPC-A4.docx");
echo $doc_contents;
$matched = preg_match_all("/(__([^\s]+)__)/sU",$doc_contents,$matches);
echo "<pre>".print_r(array_values(array_unique($matches[1])),true)."</pre>";
?>
|
|