Aspose::Words::Loading::HtmlLoadOptions Class Reference

Detailed Description

Allows to specify additional options when loading HTML document into a Document object.

#include <Aspose.Words.Cpp/Loading/HtmlLoadOptions.h>

+ Inheritance diagram for Aspose::Words::Loading::HtmlLoadOptions:

Public Member Functions

 HtmlLoadOptions ()
 Initializes a new instance of this class with default values. More...
 
 HtmlLoadOptions (LoadFormat loadFormat, String password, String baseUri)
 A shortcut to initialize a new instance of this class with properties set to the specified values. More...
 
 HtmlLoadOptions (String password)
 A shortcut to initialize a new instance of this class with the specified password to load an encrypted document. More...
 
bool get_IgnoreNoscriptElements () const
 Gets a value indicating whether to ignore <noscript> HTML elements. Default value is false. More...
 
HtmlControlType get_PreferredControlType () const
 Gets or sets preferred type of document nodes that will represent imported <input> and <select> elements. Default value is FormField. More...
 
bool get_SupportVml () const
 Gets or sets a value indicating whether to support VML images. More...
 
int32_t get_WebRequestTimeout () const
 The number of milliseconds to wait before the web request times out. The default value is 100000 milliseconds (100 seconds). More...
 
virtual const TypeInfoGetType () const override
 
virtual bool Is (const TypeInfo &target) const override
 
void set_IgnoreNoscriptElements (bool value)
 Sets a value indicating whether to ignore <noscript> HTML elements. Default value is false. More...
 
void set_PreferredControlType (HtmlControlType value)
 Setter for get_PreferredControlType. More...
 
void set_SupportVml (bool value)
 Setter for get_SupportVml. More...
 
void set_WebRequestTimeout (int32_t value)
 Setter for get_WebRequestTimeout. More...
 
- Public Member Functions inherited from LoadOptions
 LoadOptions ()
 Initializes a new instance of this class with default values. More...
 
 LoadOptions (LoadFormat loadFormat, String password, String baseUri)
 A shortcut to initialize a new instance of this class with properties set to the specified values. More...
 
 LoadOptions (String password)
 A shortcut to initialize a new instance of this class with the specified password to load an encrypted document. More...
 
String get_BaseUri () const
 Gets or sets the string that will be used to resolve relative URIs found in the document into absolute URIs when required. Can be null or empty string. Default is null. More...
 
bool get_ConvertMetafilesToPng () const
 Gets or sets whether to convert metafile (Wmf or Emf) images to Png image format. More...
 
bool get_ConvertShapeToOfficeMath () const
 Gets or sets whether to convert shapes with EquationXML to Office Math objects. More...
 
SharedPtr< Encodingget_Encoding () const
 Gets or sets the encoding that will be used to load an HTML, TXT, or CHM document if the encoding is not specified inside the document. Can be null. Default is null. More...
 
SharedPtr< FontSettingsget_FontSettings () const
 Allows to specify document font settings. More...
 
SharedPtr< LanguagePreferencesget_LanguagePreferences () const
 Gets language preferences that will be used when document is loading. More...
 
LoadFormat get_LoadFormat () const
 Specifies the format of the document to be loaded. Default is Auto. More...
 
MsWordVersion get_MswVersion () const
 Allows to specify that the document loading process should match a specific MS Word version. Default value is Word2007 More...
 
String get_Password () const
 Gets or sets the password for opening an encrypted document. Can be null or empty string. Default is null. More...
 
bool get_PreserveIncludePictureField () const
 Gets or sets whether to preserve the INCLUDEPICTURE field when reading Microsoft Word formats. The default value is false. More...
 
SharedPtr< IResourceLoadingCallbackget_ResourceLoadingCallback () const
 Allows to control how external resources (images, style sheets) are loaded when a document is imported from HTML, MHTML. More...
 
String get_TempFolder () const
 Allows to use temporary files when reading document. By default this property is null and no temporary files are used. More...
 
bool get_UpdateDirtyFields () const
 Specifies whether to update the fields with the dirty attribute. More...
 
SharedPtr< IWarningCallbackget_WarningCallback () const
 Called during a load operation, when an issue is detected that might result in data or formatting fidelity loss. More...
 
void set_BaseUri (String value)
 Setter for get_BaseUri. More...
 
void set_ConvertMetafilesToPng (bool value)
 Setter for get_ConvertMetafilesToPng. More...
 
void set_ConvertShapeToOfficeMath (bool value)
 Setter for get_ConvertShapeToOfficeMath. More...
 
void set_Encoding (SharedPtr< Encoding > value)
 Setter for get_Encoding. More...
 
void set_FontSettings (SharedPtr< FontSettings > value)
 Setter for get_FontSettings. More...
 
void set_LoadFormat (LoadFormat value)
 Setter for get_LoadFormat. More...
 
void set_MswVersion (MsWordVersion value)
 Setter for get_MswVersion. More...
 
void set_Password (String value)
 Setter for get_Password. More...
 
void set_PreserveIncludePictureField (bool value)
 Setter for get_PreserveIncludePictureField. More...
 
void set_ResourceLoadingCallback (SharedPtr< IResourceLoadingCallback > value)
 Setter for get_ResourceLoadingCallback. More...
 
void set_TempFolder (String value)
 Setter for get_TempFolder. More...
 
void set_UpdateDirtyFields (bool value)
 Setter for get_UpdateDirtyFields. More...
 
void set_WarningCallback (SharedPtr< IWarningCallback > value)
 Setter for get_WarningCallback. More...
 

Static Public Member Functions

static const TypeInfoType ()
 
- Static Public Member Functions inherited from LoadOptions
static const TypeInfoType ()
 

Constructor & Destructor Documentation

◆ HtmlLoadOptions() [1/3]

Aspose::Words::Loading::HtmlLoadOptions::HtmlLoadOptions ( )

Initializes a new instance of this class with default values.

Examples

Shows how to support conditional comments while loading an HTML document.

auto loadOptions = MakeObject<HtmlLoadOptions>();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions->set_SupportVml(supportVml);
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
auto doc = MakeObject<Document>(MyDir + u"VML conditional.htm", loadOptions);
if (supportVml)
{
ASSERT_EQ(ImageType::Jpeg, (System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true)))->get_ImageData()->get_ImageType());
}
else
{
ASSERT_EQ(ImageType::Png, (System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true)))->get_ImageData()->get_ImageType());
}

◆ HtmlLoadOptions() [2/3]

Aspose::Words::Loading::HtmlLoadOptions::HtmlLoadOptions ( System::String  password)

A shortcut to initialize a new instance of this class with the specified password to load an encrypted document.

Parameters
passwordThe password to open an encrypted document. Can be null or empty string.
Examples

Shows how to encrypt an Html document, and then open it using a password.

// Create and sign an encrypted HTML document from an encrypted .docx.
SharedPtr<CertificateHolder> certificateHolder = CertificateHolder::Create(MyDir + u"morzal.pfx", u"aw");
auto signOptions = MakeObject<SignOptions>();
signOptions->set_Comments(u"Comment");
signOptions->set_SignTime(System::DateTime::get_Now());
signOptions->set_DecryptionPassword(u"docPassword");
String inputFileName = MyDir + u"Encrypted.docx";
String outputFileName = ArtifactsDir + u"HtmlLoadOptions.EncryptedHtml.html";
DigitalSignatureUtil::Sign(inputFileName, outputFileName, certificateHolder, signOptions);
// To load and read this document, we will need to pass its decryption
// password using a HtmlLoadOptions object.
auto loadOptions = MakeObject<HtmlLoadOptions>(u"docPassword");
ASSERT_EQ(signOptions->get_DecryptionPassword(), loadOptions->get_Password());
auto doc = MakeObject<Document>(outputFileName, loadOptions);
ASSERT_EQ(u"Test encrypted document.", doc->GetText().Trim());

◆ HtmlLoadOptions() [3/3]

Aspose::Words::Loading::HtmlLoadOptions::HtmlLoadOptions ( Aspose::Words::LoadFormat  loadFormat,
System::String  password,
System::String  baseUri 
)

A shortcut to initialize a new instance of this class with properties set to the specified values.

Parameters
loadFormatThe format of the document to be loaded.
passwordThe password to open an encrypted document. Can be null or empty string.
baseUriThe string that will be used to resolve relative URIs to absolute. Can be null or empty string.
Examples

Shows how to specify a base URI when opening an html document.

// Suppose we want to load an .html document that contains an image linked by a relative URI
// while the image is in a different location. In that case, we will need to resolve the relative URI into an absolute one.
// We can provide a base URI using an HtmlLoadOptions object.
auto loadOptions = MakeObject<HtmlLoadOptions>(LoadFormat::Html, u"", ImageDir);
ASSERT_EQ(LoadFormat::Html, loadOptions->get_LoadFormat());
auto doc = MakeObject<Document>(MyDir + u"Missing image.html", loadOptions);
// While the image was broken in the input .html, our custom base URI helped us repair the link.
auto imageShape = System::DynamicCast<Shape>(doc->GetChildNodes(NodeType::Shape, true)->idx_get(0));
ASSERT_TRUE(imageShape->get_IsImage());
// This output document will display the image that was missing.
doc->Save(ArtifactsDir + u"HtmlLoadOptions.BaseUri.docx");

Member Function Documentation

◆ get_IgnoreNoscriptElements()

bool Aspose::Words::Loading::HtmlLoadOptions::get_IgnoreNoscriptElements ( ) const

Gets a value indicating whether to ignore <noscript> HTML elements. Default value is false.

◆ get_PreferredControlType()

Aspose::Words::Loading::HtmlControlType Aspose::Words::Loading::HtmlLoadOptions::get_PreferredControlType ( ) const

Gets or sets preferred type of document nodes that will represent imported <input> and <select> elements. Default value is FormField.

Examples

Shows how to set preferred type of document nodes that will represent imported <input> and <select> elements.

const String html = u"\r\n <html>\r\n <select name='ComboBox' size='1'>\r\n <option "
u"value='val1'>item1</option>\r\n <option value='val2'></option> \r\n "
u" </select>\r\n </html>\r\n ";
auto htmlLoadOptions = MakeObject<HtmlLoadOptions>();
htmlLoadOptions->set_PreferredControlType(HtmlControlType::StructuredDocumentTag);
auto doc = MakeObject<Document>(MakeObject<System::IO::MemoryStream>(System::Text::Encoding::get_UTF8()->GetBytes(html)), htmlLoadOptions);
SharedPtr<NodeCollection> nodes = doc->GetChildNodes(NodeType::StructuredDocumentTag, true);
auto tag = System::DynamicCast<StructuredDocumentTag>(nodes->idx_get(0));

◆ get_SupportVml()

bool Aspose::Words::Loading::HtmlLoadOptions::get_SupportVml ( ) const

Gets or sets a value indicating whether to support VML images.

Examples

Shows how to support conditional comments while loading an HTML document.

auto loadOptions = MakeObject<HtmlLoadOptions>();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions->set_SupportVml(supportVml);
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
auto doc = MakeObject<Document>(MyDir + u"VML conditional.htm", loadOptions);
if (supportVml)
{
ASSERT_EQ(ImageType::Jpeg, (System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true)))->get_ImageData()->get_ImageType());
}
else
{
ASSERT_EQ(ImageType::Png, (System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true)))->get_ImageData()->get_ImageType());
}

◆ get_WebRequestTimeout()

int32_t Aspose::Words::Loading::HtmlLoadOptions::get_WebRequestTimeout ( ) const

The number of milliseconds to wait before the web request times out. The default value is 100000 milliseconds (100 seconds).

Examples

Shows how to set a time limit for web requests when loading a document with external resources linked by URLs.

void WebRequestTimeout()
{
// Create a new HtmlLoadOptions object and verify its timeout threshold for a web request.
auto options = MakeObject<HtmlLoadOptions>();
// When loading an Html document with resources externally linked by a web address URL,
// Aspose.Words will abort web requests that fail to fetch the resources within this time limit, in milliseconds.
ASSERT_EQ(100000, options->get_WebRequestTimeout());
// Set a WarningCallback that will record all warnings that occur during loading.
auto warningCallback = MakeObject<ExHtmlLoadOptions::ListDocumentWarnings>();
options->set_WarningCallback(warningCallback);
// Load such a document and verify that a shape with image data has been created.
// This linked image will require a web request to load, which will have to complete within our time limit.
String html = String(u"<html>\n <img src=\"") + AsposeLogoUrl + u"\" alt=\"Aspose logo\" style=\"width:400px;height:400px;\">\n</html>";
auto doc = MakeObject<Document>(MakeObject<System::IO::MemoryStream>(System::Text::Encoding::get_UTF8()->GetBytes(html)), options);
auto imageShape = System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true));
ASSERT_EQ(7498, imageShape->get_ImageData()->get_ImageBytes()->get_Length());
ASSERT_EQ(0, warningCallback->Warnings()->get_Count());
// Set an unreasonable timeout limit and try load the document again.
options->set_WebRequestTimeout(0);
doc = MakeObject<Document>(MakeObject<System::IO::MemoryStream>(System::Text::Encoding::get_UTF8()->GetBytes(html)), options);
// A web request that fails to obtain an image within the time limit will still produce an image.
// However, the image will be the red 'x' that commonly signifies missing images.
imageShape = System::DynamicCast<Shape>(doc->GetChild(NodeType::Shape, 0, true));
ASSERT_EQ(924, imageShape->get_ImageData()->get_ImageBytes()->get_Length());
// We can also configure a custom callback to pick up any warnings from timed out web requests.
ASSERT_EQ(WarningSource::Html, warningCallback->Warnings()->idx_get(0)->get_Source());
ASSERT_EQ(WarningType::DataLoss, warningCallback->Warnings()->idx_get(0)->get_WarningType());
ASSERT_EQ(String::Format(u"Couldn't load a resource from \'{0}\'.", AsposeLogoUrl), warningCallback->Warnings()->idx_get(0)->get_Description());
ASSERT_EQ(WarningSource::Html, warningCallback->Warnings()->idx_get(1)->get_Source());
ASSERT_EQ(WarningType::DataLoss, warningCallback->Warnings()->idx_get(1)->get_WarningType());
ASSERT_EQ(u"Image has been replaced with a placeholder.", warningCallback->Warnings()->idx_get(1)->get_Description());
doc->Save(ArtifactsDir + u"HtmlLoadOptions.WebRequestTimeout.docx");
}
class ListDocumentWarnings : public IWarningCallback
{
public:
void Warning(SharedPtr<WarningInfo> info) override
{
mWarnings->Add(info);
}
SharedPtr<System::Collections::Generic::List<SharedPtr<WarningInfo>>> Warnings()
{
return mWarnings;
}
ListDocumentWarnings() : mWarnings(MakeObject<System::Collections::Generic::List<SharedPtr<WarningInfo>>>())
{
}
private:
SharedPtr<System::Collections::Generic::List<SharedPtr<WarningInfo>>> mWarnings;
};

◆ GetType()

virtual const System::TypeInfo& Aspose::Words::Loading::HtmlLoadOptions::GetType ( ) const
overridevirtual

◆ Is()

virtual bool Aspose::Words::Loading::HtmlLoadOptions::Is ( const System::TypeInfo target) const
overridevirtual

◆ set_IgnoreNoscriptElements()

void Aspose::Words::Loading::HtmlLoadOptions::set_IgnoreNoscriptElements ( bool  value)

Sets a value indicating whether to ignore <noscript> HTML elements. Default value is false.

◆ set_PreferredControlType()

void Aspose::Words::Loading::HtmlLoadOptions::set_PreferredControlType ( Aspose::Words::Loading::HtmlControlType  value)

◆ set_SupportVml()

void Aspose::Words::Loading::HtmlLoadOptions::set_SupportVml ( bool  value)

◆ set_WebRequestTimeout()

void Aspose::Words::Loading::HtmlLoadOptions::set_WebRequestTimeout ( int32_t  value)

◆ Type()

static const System::TypeInfo& Aspose::Words::Loading::HtmlLoadOptions::Type ( )
static