Files
ETS/ETS_Data/SearchAndReplace.cs

415 lines
19 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
namespace ETS_Data
{
public class SearchAndReplacer
{
public static XmlDocument GetXmlDocument(OpenXmlPart part)
{
XmlDocument xmlDoc = new XmlDocument();
using (Stream partStream = part.GetStream())
using (XmlReader partXmlReader = XmlReader.Create(partStream))
xmlDoc.Load(partXmlReader);
return xmlDoc;
}
public static void PutXmlDocument(OpenXmlPart part, XmlDocument xmlDoc)
{
using (Stream partStream = part.GetStream(FileMode.Create, FileAccess.Write))
using (XmlWriter partXmlWriter = XmlWriter.Create(partStream))
xmlDoc.Save(partXmlWriter);
}
static void SearchAndReplaceInParagraph(XmlElement paragraph, string search,
string replace, bool matchCase)
{
XmlDocument xmlDoc = paragraph.OwnerDocument;
string wordNamespace =
"http://schemas.openxmlformats.org/wordprocessingml/2006/main";
XmlNamespaceManager nsmgr =
new XmlNamespaceManager(xmlDoc.NameTable);
nsmgr.AddNamespace("w", wordNamespace);
XmlNodeList paragraphText = paragraph.SelectNodes("descendant::w:t", nsmgr);
StringBuilder sb = new StringBuilder();
foreach (XmlNode text in paragraphText)
sb.Append(((XmlElement)text).InnerText);
if (sb.ToString().Contains(search) ||
(!matchCase && sb.ToString().ToUpper().Contains(search.ToUpper())))
{
XmlNodeList runs = paragraph.SelectNodes("child::w:r", nsmgr);
foreach (XmlElement run in runs)
{
XmlNodeList childElements = run.SelectNodes("child::*", nsmgr);
if (childElements.Count > 0)
{
XmlElement last = (XmlElement)childElements[childElements.Count - 1];
for (int c = childElements.Count - 1; c >= 0; --c)
{
if (childElements[c].Name == "w:rPr")
continue;
if (childElements[c].Name == "w:t")
{
string textElementString = childElements[c].InnerText;
for (int i = textElementString.Length - 1; i >= 0; --i)
{
XmlElement newRun =
xmlDoc.CreateElement("w:r", wordNamespace);
XmlElement runProps =
(XmlElement)run.SelectSingleNode("child::w:rPr", nsmgr);
if (runProps != null)
{
XmlElement newRunProps =
(XmlElement)runProps.CloneNode(true);
newRun.AppendChild(newRunProps);
}
XmlElement newTextElement =
xmlDoc.CreateElement("w:t", wordNamespace);
XmlText newText =
xmlDoc.CreateTextNode(textElementString[i].ToString());
newTextElement.AppendChild(newText);
if (textElementString[i] == ' ')
{
XmlAttribute xmlSpace = xmlDoc.CreateAttribute(
"xml", "space",
"http://www.w3.org/XML/1998/namespace");
xmlSpace.Value = "preserve";
newTextElement.Attributes.Append(xmlSpace);
}
newRun.AppendChild(newTextElement);
paragraph.InsertAfter(newRun, run);
}
}
else
{
XmlElement newRun = xmlDoc.CreateElement("w:r", wordNamespace);
XmlElement runProps =
(XmlElement)run.SelectSingleNode("child::w:rPr", nsmgr);
if (runProps != null)
{
XmlElement newRunProps =
(XmlElement)runProps.CloneNode(true);
newRun.AppendChild(newRunProps);
}
XmlElement newChildElement =
(XmlElement)childElements[c].CloneNode(true);
newRun.AppendChild(newChildElement);
paragraph.InsertAfter(newRun, run);
}
}
paragraph.RemoveChild(run);
}
}
while (true)
{
bool cont = false;
runs = paragraph.SelectNodes("child::w:r", nsmgr);
for (int i = 0; i <= runs.Count - search.Length; ++i)
{
bool match = true;
for (int c = 0; c < search.Length; ++c)
{
XmlElement textElement =
(XmlElement)runs[i + c].SelectSingleNode("child::w:t", nsmgr);
if (textElement == null)
{
match = false;
break;
}
if (textElement.InnerText == search[c].ToString())
continue;
if (!matchCase &&
textElement.InnerText.ToUpper() == search[c].ToString().ToUpper())
continue;
match = false;
break;
}
if (match)
{
XmlElement runProps =
(XmlElement)runs[i].SelectSingleNode("descendant::w:rPr", nsmgr);
XmlElement newRun = xmlDoc.CreateElement("w:r", wordNamespace);
if (runProps != null)
{
XmlElement newRunProps = (XmlElement)runProps.CloneNode(true);
newRun.AppendChild(newRunProps);
}
XmlElement newTextElement =
xmlDoc.CreateElement("w:t", wordNamespace);
XmlText newText = xmlDoc.CreateTextNode(replace);
newTextElement.AppendChild(newText);
if (replace[0] == ' ' || replace[replace.Length - 1] == ' ')
{
XmlAttribute xmlSpace = xmlDoc.CreateAttribute("xml", "space",
"http://www.w3.org/XML/1998/namespace");
xmlSpace.Value = "preserve";
newTextElement.Attributes.Append(xmlSpace);
}
newRun.AppendChild(newTextElement);
paragraph.InsertAfter(newRun, (XmlNode)runs[i]);
for (int c = 0; c < search.Length; ++c)
paragraph.RemoveChild(runs[i + c]);
cont = true;
break;
}
}
if (!cont)
break;
}
// Consolidate adjacent runs that have only text elements, and have the
// same run properties. This isn't necessary to create a valid document,
// however, having the split runs is a bit messy.
XmlNodeList children = paragraph.SelectNodes("child::*", nsmgr);
List<int> matchId = new List<int>();
int id = 0;
for (int c = 0; c < children.Count; ++c)
{
if (c == 0)
{
matchId.Add(id);
continue;
}
if (children[c].Name == "w:r" &&
children[c - 1].Name == "w:r" &&
children[c].SelectSingleNode("w:t", nsmgr) != null &&
children[c - 1].SelectSingleNode("w:t", nsmgr) != null)
{
XmlElement runProps =
(XmlElement)children[c].SelectSingleNode("w:rPr", nsmgr);
XmlElement lastRunProps =
(XmlElement)children[c - 1].SelectSingleNode("w:rPr", nsmgr);
if ((runProps == null && lastRunProps != null) ||
(runProps != null && lastRunProps == null))
{
matchId.Add(++id);
continue;
}
if (runProps != null && runProps.InnerXml != lastRunProps.InnerXml)
{
matchId.Add(++id);
continue;
}
matchId.Add(id);
continue;
}
matchId.Add(++id);
}
for (int i = 0; i <= id; ++i)
{
var x1 = matchId.IndexOf(i);
var x2 = matchId.LastIndexOf(i);
if (x1 == x2)
continue;
StringBuilder sb2 = new StringBuilder();
for (int z = x1; z <= x2; ++z)
sb2.Append(((XmlElement)children[z]
.SelectSingleNode("w:t", nsmgr)).InnerText);
XmlElement newRun = xmlDoc.CreateElement("w:r", wordNamespace);
XmlElement runProps =
(XmlElement)children[x1].SelectSingleNode("child::w:rPr", nsmgr);
if (runProps != null)
{
XmlElement newRunProps = (XmlElement)runProps.CloneNode(true);
newRun.AppendChild(newRunProps);
}
XmlElement newTextElement = xmlDoc.CreateElement("w:t", wordNamespace);
XmlText newText = xmlDoc.CreateTextNode(sb2.ToString());
newTextElement.AppendChild(newText);
if (sb2[0] == ' ' || sb2[sb2.Length - 1] == ' ')
{
XmlAttribute xmlSpace = xmlDoc.CreateAttribute(
"xml", "space", "http://www.w3.org/XML/1998/namespace");
xmlSpace.Value = "preserve";
newTextElement.Attributes.Append(xmlSpace);
}
newRun.AppendChild(newTextElement);
paragraph.InsertAfter(newRun, children[x2]);
for (int z = x1; z <= x2; ++z)
paragraph.RemoveChild(children[z]);
}
var txbxParagraphs = paragraph.SelectNodes("descendant::w:p", nsmgr);
foreach (XmlElement p in txbxParagraphs)
SearchAndReplaceInParagraph((XmlElement)p, search, replace, matchCase);
}
}
public static bool PartHasTrackedRevisions(OpenXmlPart part)
{
XmlDocument doc = GetXmlDocument(part);
string wordNamespace =
"http://schemas.openxmlformats.org/wordprocessingml/2006/main";
XmlNamespaceManager nsmgr =
new XmlNamespaceManager(doc.NameTable);
nsmgr.AddNamespace("w", wordNamespace);
string xpathExpression =
"descendant::w:cellDel|" +
"descendant::w:cellIns|" +
"descendant::w:cellMerge|" +
"descendant::w:customXmlDelRangeEnd|" +
"descendant::w:customXmlDelRangeStart|" +
"descendant::w:customXmlInsRangeEnd|" +
"descendant::w:customXmlInsRangeStart|" +
"descendant::w:del|" +
"descendant::w:delInstrText|" +
"descendant::w:delText|" +
"descendant::w:ins|" +
"descendant::w:moveFrom|" +
"descendant::w:moveFromRangeEnd|" +
"descendant::w:moveFromRangeStart|" +
"descendant::w:moveTo|" +
"descendant::w:moveToRangeEnd|" +
"descendant::w:moveToRangeStart|" +
"descendant::w:moveTo|" +
"descendant::w:numberingChange|" +
"descendant::w:rPrChange|" +
"descendant::w:pPrChange|" +
"descendant::w:rPrChange|" +
"descendant::w:sectPrChange|" +
"descendant::w:tcPrChange|" +
"descendant::w:tblGridChange|" +
"descendant::w:tblPrChange|" +
"descendant::w:tblPrExChange|" +
"descendant::w:trPrChange";
XmlNodeList descendants = doc.SelectNodes(xpathExpression, nsmgr);
return descendants.Count > 0;
}
public static bool HasTrackedRevisions(WordprocessingDocument doc)
{
if (PartHasTrackedRevisions(doc.MainDocumentPart))
return true;
foreach (var part in doc.MainDocumentPart.HeaderParts)
if (PartHasTrackedRevisions(part))
return true;
foreach (var part in doc.MainDocumentPart.FooterParts)
if (PartHasTrackedRevisions(part))
return true;
if (doc.MainDocumentPart.EndnotesPart != null)
if (PartHasTrackedRevisions(doc.MainDocumentPart.EndnotesPart))
return true;
if (doc.MainDocumentPart.FootnotesPart != null)
if (PartHasTrackedRevisions(doc.MainDocumentPart.FootnotesPart))
return true;
return false;
}
private static void SearchAndReplaceInXmlDocument(XmlDocument xmlDocument, string search,
string replace, bool matchCase)
{
XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDocument.NameTable);
nsmgr.AddNamespace("w",
"http://schemas.openxmlformats.org/wordprocessingml/2006/main");
var paragraphs = xmlDocument.SelectNodes("descendant::w:p", nsmgr);
foreach (var paragraph in paragraphs)
SearchAndReplaceInParagraph((XmlElement)paragraph, search, replace, matchCase);
}
public static void SearchAndReplace(WordprocessingDocument wordDoc, string search,
string replace, bool matchCase)
{
if (HasTrackedRevisions(wordDoc))
throw new SearchAndReplaceException(
"Search and replace will not work with documents " +
"that contain revision tracking.");
XmlDocument xmlDoc;
xmlDoc = GetXmlDocument(wordDoc.MainDocumentPart.DocumentSettingsPart);
XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);
nsmgr.AddNamespace("w",
"http://schemas.openxmlformats.org/wordprocessingml/2006/main");
XmlNodeList trackedRevisions =
xmlDoc.SelectNodes("descendant::w:trackRevisions", nsmgr);
if (trackedRevisions.Count > 0)
throw new SearchAndReplaceException(
"Revision tracking is turned on for document.");
xmlDoc = GetXmlDocument(wordDoc.MainDocumentPart);
SearchAndReplaceInXmlDocument(xmlDoc, search, replace, matchCase);
PutXmlDocument(wordDoc.MainDocumentPart, xmlDoc);
foreach (var part in wordDoc.MainDocumentPart.HeaderParts)
{
xmlDoc = GetXmlDocument(part);
SearchAndReplaceInXmlDocument(xmlDoc, search, replace, matchCase);
PutXmlDocument(part, xmlDoc);
}
foreach (var part in wordDoc.MainDocumentPart.FooterParts)
{
xmlDoc = GetXmlDocument(part);
SearchAndReplaceInXmlDocument(xmlDoc, search, replace, matchCase);
PutXmlDocument(part, xmlDoc);
}
if (wordDoc.MainDocumentPart.EndnotesPart != null)
{
xmlDoc = GetXmlDocument(wordDoc.MainDocumentPart.EndnotesPart);
SearchAndReplaceInXmlDocument(xmlDoc, search, replace, matchCase);
PutXmlDocument(wordDoc.MainDocumentPart.EndnotesPart, xmlDoc);
}
if (wordDoc.MainDocumentPart.FootnotesPart != null)
{
xmlDoc = GetXmlDocument(wordDoc.MainDocumentPart.FootnotesPart);
SearchAndReplaceInXmlDocument(xmlDoc, search, replace, matchCase);
PutXmlDocument(wordDoc.MainDocumentPart.FootnotesPart, xmlDoc);
}
}
}
public class SearchAndReplaceException : Exception
{
public SearchAndReplaceException(string message) : base(message) { }
}
public static class Extensions
{
public static string ToStringAlignAttributes(this XContainer xContainer)
{
XmlWriterSettings settings = new XmlWriterSettings();
settings.Indent = true;
settings.OmitXmlDeclaration = true;
settings.NewLineOnAttributes = true;
StringBuilder sb = new StringBuilder();
using (XmlWriter xmlWriter = XmlWriter.Create(sb, settings))
xContainer.WriteTo(xmlWriter);
return sb.ToString();
}
public static XDocument GetXDocument(this XmlDocument document)
{
XDocument xDoc = new XDocument();
using (XmlWriter xmlWriter = xDoc.CreateWriter())
document.WriteTo(xmlWriter);
XmlDeclaration decl =
document.ChildNodes.OfType<XmlDeclaration>().FirstOrDefault();
if (decl != null)
xDoc.Declaration = new XDeclaration(decl.Version, decl.Encoding,
decl.Standalone);
return xDoc;
}
public static XElement GetXElement(this XmlNode node)
{
XDocument xDoc = new XDocument();
using (XmlWriter xmlWriter = xDoc.CreateWriter())
node.WriteTo(xmlWriter);
return xDoc.Root;
}
}
}