Custom Sitemap.xml generator in Optimizely CMS 12
This is a step by step guide to generate sitemap.xml file for all the sites in optimizely cms 12. The sitemaps are generated under wwwroot/sitemaps with the name of the site configured.
Step 1: Add a new field in Home Page to store the name of the sitemap file
Step 2: Create an interface for Sitemap generator service
Step 3: Implement the interface with sitemap generation logic
Step 4: Register the Service in startup.cs
Step 5: Create a scheduled job that will run the sitemap generator service
Step 6: Create a Controlller that will serve sitemap
Step 7: Access the sitemap using the the url for example https://localhost:5000/seo-sitemap.xml
Home.cs
namespace CustomSitemap.Web.Pages
{
[ContentType(DisplayName = "Home Page", GroupName = "Corporate", GUID = "8F0896B1-E3F2-4E8C-81AC-4FE52C519402")]
public class HomePage : PageData
{
[Display(Name = "SEO sitemap.xml file name",
Description = "Provide meaning full name for SEO sitemap like site-sitemap.xml",
GroupName = SystemTabNames.Settings,
Order = 550)]
public virtual string SitemapFileName { get; set; }
}
}
ISitemapGeneratorService.cs
namespace CustomSitemap.Web.Business.Sitemap
{
public interface ISitemapGeneratorService
{
IEnumerable<SiteDefinition> GetAllSites();
bool GenerateSitemaps();
void DeleteSitemaps();
}
}
SitemapGeneratorService.cs
using EPiServer.Filters;
using EPiServer.Web;
using EPiServer.Web.Routing;
using EPiServer.Logging;
using System.Globalization;
using CustomSitemap.Web.Business.Extensions;
namespace CustomSitemap.Web.Business.Sitemap
{
/// <summary>
/// Get the site names from configuration and generate sitemap with the sitename and save to wwwroot/sitemaps
/// </summary>
public class SitemapGeneratorService : ISitemapGeneratorService
{
private readonly IContentLoader _contentLoader;
private readonly IContentRepository _contentRepository;
private readonly UrlResolver _urlResolver;
private readonly ISiteDefinitionRepository _siteDefinitionRepository;
private readonly ILanguageBranchRepository _languageBranchRepository;
private readonly IWebHostEnvironment _webHostingEnvironment;
private readonly EPiServer.Logging.ILogger _logger;
/// <summary>
/// constructor
/// </summary>
/// <param name="contentLoader"></param>
/// <param name="urlResolver"></param>
/// <param name="siteDefinitionRepository"></param>
public SitemapGeneratorService(
IContentLoader contentLoader,
IContentRepository contentRepository,
UrlResolver urlResolver,
ISiteDefinitionRepository siteDefinitionRepository,
ILanguageBranchRepository languageBranchRepository,
IWebHostEnvironment webHostingEnvironment)
{
_contentLoader = contentLoader;
_contentRepository = contentRepository;
_urlResolver = urlResolver;
_siteDefinitionRepository = siteDefinitionRepository;
_languageBranchRepository = languageBranchRepository;
_webHostingEnvironment = webHostingEnvironment;
_logger = LogManager.GetLogger(typeof(SitemapGeneratorService));
}
/// <summary>
/// List all the sites created in optimizely cms
/// </summary>
/// <returns></returns>
public IEnumerable<SiteDefinition> GetAllSites()
{
return _siteDefinitionRepository.List();
}
public void DeleteSitemaps()
{
// Get the path to the wwwroot folder
var wwwrootPath = _webHostingEnvironment.WebRootPath;
// Check if the directory exists
if (!Directory.Exists(wwwrootPath))
return;
// Get the path for sitemap files wwwroot/sitemaps
var sitemapsPath = Path.Combine(_webHostingEnvironment.WebRootPath, "sitemaps");
var sitemapFiles = Directory.GetFiles(sitemapsPath, "*sitemap.xml", SearchOption.TopDirectoryOnly);
foreach (var file in sitemapFiles)
{
try
{
// Delete each file
File.Delete(file);
_logger.Warning($"Deleted: {file}");
}
catch (Exception ex)
{
// Log the error or handle it as needed
_logger.Error($"Error deleting file {file}: {ex.Message}");
}
}
}
public bool GenerateSitemaps()
{
try
{
//Delete already created sitemap files
DeleteSitemaps();
// Iterate over all sites and generate sitemaps
foreach (var siteDefinition in GetAllSites())
{
//get the start page for the current site
var startPage = _contentLoader.Get<PageData>(siteDefinition.StartPage);
//get the url of the host
var primaryHostUrl = siteDefinition.Hosts.FirstOrDefault(host => host.Type == HostDefinitionType.Primary).Url;
//generate sitemap
var sitemapContent = GenerateSitemapForSite(startPage, primaryHostUrl);
// Save the generated sitemap.xml to a specified location
var sitemapPath = Path.Combine(_webHostingEnvironment.WebRootPath, "sitemaps", siteDefinition.Name.Replace(" ", "-").ToLower() + "-sitemap.xml");
File.WriteAllText(sitemapPath, sitemapContent);
}
return true;
}
catch
{
return false;
}
}
private string GenerateSitemapForSite(PageData startPage, Uri primaryHostUrl)
{
var publishedFilter = new FilterPublished();
var accessFilter = new FilterAccess();
var sitemapEntries = new List<string>();
//var availablePageLanguages = FilterForVisitor.Filter(pageLanguagesBranches).OfType<PageData>();
//var currentPageLanguages = availablePageLanguages.Select(page => page.Language.Name).ToList();
//Get the available languages for the start page of the site
var pageLanguagesBranches = _contentRepository.GetLanguageBranches<PageData>(startPage.ContentLink).ToList();
//var hostname = startPage.
foreach (var availablePage in pageLanguagesBranches)
{
var culture = new CultureInfo(availablePage.Language.Name);
//Add home page to the sitemap
var homePageUrl = _urlResolver.GetUrl(availablePage.ContentLink, availablePage.Language.Name);
sitemapEntries.Add($"<url><loc>{homePageUrl}</loc><lastmod>{Convert.ToDateTime(availablePage.GetLastPublishDate()).ToString("yyyy-MM-ddTHH:mm:sszzz")}</lastmod><priority>0.80</priority></url>");
//get the children of the home page
var pages = _contentLoader.GetDescendents(availablePage.ContentLink);
foreach (var pageLink in pages)
{
if (_contentLoader.TryGet(pageLink, culture, out PageData page))
{
//check if the page is set to exclude from sitemap
bool excludeFromSitemap = bool.TryParse(page["ExcludeFromSitemap"] as string, out bool result);
//if the page is published, is allowed in the sitemap and is not a shortcut
if (!publishedFilter.ShouldFilter(page) && !accessFilter.ShouldFilter(page) && !excludeFromSitemap && page.LinkType != PageShortcutType.Shortcut)
{
var url = _urlResolver.GetUrl(page.ContentLink, availablePage.Language.Name);
if (!string.IsNullOrEmpty(url))
{
sitemapEntries.Add($"<url><loc>{url}</loc><lastmod>{Convert.ToDateTime(page.GetLastPublishDate()).ToString("yyyy-MM-ddTHH:mm:sszzz")}</lastmod><priority>0.64</priority></url>");
}
}
}
}
}
return $"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">{string.Join("", sitemapEntries)}</urlset>";
}
}
}
Startup.cs
public void ConfigureServices(IServiceCollection services)
{
services.AddTransient<ISitemapGeneratorService, SitemapGeneratorService>();
services.AddCms();
}
SitemapGeneratorJob.cs
namespace CustomSitemap.Web.Business.ScheduledJobs
{
/// <summary>
/// Scheduled job for generating SEO sitemaps.
/// </summary>
[ScheduledPlugIn(DisplayName = "Generate SEO Sitemaps", Description = "Generates sitemap for each site", GUID = "5C3DDEC2-0CD5-4734-B8EE-FDA8BE08C46E")]
public class SitemapGeneratorJob : ScheduledJobBase
{
private readonly EPiServer.Logging.ILogger _logger;
private ISitemapGeneratorService _sitemapGeneratorService;
private readonly IWebHostEnvironment _environment;
private bool _stopSignaled;
/// <summary>
/// Default constructor
/// </summary>
/// <param name="siteConfigService"></param>
public SitemapGeneratorJob(
ISitemapGeneratorService sitemapGeneratorService,
IWebHostEnvironment environment)
{
_sitemapGeneratorService = sitemapGeneratorService;
_logger = LogManager.GetLogger(typeof(SitemapGeneratorJob));
IsStoppable = true;
_environment = environment;
}
/// <summary>
/// stop
/// </summary>
public override void Stop()
{
_stopSignaled = true;
}
/// <summary>
/// Set the primary hosts
/// </summary>
/// <returns></returns>
public override string Execute()
{
OnStatusChanged("Starting execution of SitemapGeneratorJob");
_logger.Information("Starting the job : SitemapGeneratorJob");
//Call the method to Generate Sitemaps
if (_environment.IsDevelopment())
{
_logger.Information($"Current environment is {_environment.EnvironmentName}");
_logger.Warning($"This job is not intended to run on the environment {_environment.EnvironmentName}");
return "SitemapGeneratorJob will not run in development environment";
}
var status = _sitemapGeneratorService.GenerateSitemaps();
if (!status)
return "SitemapGeneratorJob did not run successfully. Please check the logs for more information";
if (_stopSignaled)
{
return "SitemapGeneratorJob was stopped";
}
return "SitemapGeneratorJob completed successfully";
}
}
}
SitemapController.cs
namespace CustomSitemap.Web.Controllers
{
/// <summary>
/// controller to get SEO sitemap or regular xml file.
/// </summary>
[Route("{sitemapName}.xml")]
public class SitemapController : Controller
{
private readonly IWebHostEnvironment _webHostEnvironment;
private readonly ISiteDefinitionResolver _siteDefinitionResolver;
private readonly IHttpContextAccessor _httpContextAccessor;
private readonly IContentLoader _contentLoader;
public SitemapController(IWebHostEnvironment webHostEnvironment,
ISiteDefinitionResolver siteDefinitionResolver,
IHttpContextAccessor httpContextAccessor,
IContentLoader contentLoader)
{
_webHostEnvironment = webHostEnvironment;
_siteDefinitionResolver = siteDefinitionResolver;
_httpContextAccessor = httpContextAccessor;
_contentLoader = contentLoader;
}
/// <summary>
/// action method to serve sitemap or any xml file
/// </summary>
/// <param name="sitemapName"></param>
/// <returns></returns>
[HttpGet]
public IActionResult Index(string sitemapName)
{
if (!string.IsNullOrEmpty(sitemapName) && !sitemapName.ToLower().EndsWith(".xml"))
{
sitemapName = $"{sitemapName}.xml";
}
//get the start page for the site
var startPage = _contentLoader.Get<PageData>(ContentReference.StartPage);
//read the sitemap file name from the home page
var sitemapFileName = startPage["SitemapFileName"] as string;
//if the requested file name is same as sitemap file name configured in home page or if it is not configured but it is sitemap.xml
if ((!string.IsNullOrEmpty(sitemapFileName) && sitemapFileName.Equals(sitemapName, StringComparison.OrdinalIgnoreCase)) || (string.IsNullOrEmpty(sitemapFileName) && sitemapName.ToLower().Equals("sitemap.xml")))
{
var httpContext = _httpContextAccessor.HttpContext;
if (httpContext == null)
{
return NotFound();
}
// Attempt to resolve the site by host
var currentSite = _siteDefinitionResolver.GetByHostname(httpContext.Request.Host.Host, true);
// Fallback to hostname with port
if (currentSite == null)
{
var hostWithPort = httpContext.Request.Host.Value; // Host with port, e.g., "localhost:5000"
currentSite = _siteDefinitionResolver.GetByHostname(hostWithPort, true);
}
//get the sitemap from wwwroot/sitemaps
var filePath = Path.Combine(_webHostEnvironment.WebRootPath, "sitemaps", currentSite.Name.Replace(" ", "-").ToLower() + "-sitemap.xml");
if (!System.IO.File.Exists(filePath))
{
return NotFound();
}
//read the contents of the sitemap file and return
var fileContent = System.IO.File.ReadAllText(filePath);
return Content(fileContent, "application/xml");
}
else
{
//this is not a sitemap file, so try getting the requested file from wwwroot
var filePath = Path.Combine(_webHostEnvironment.WebRootPath, sitemapName);
if (!System.IO.File.Exists(filePath))
{
return NotFound();
}
//return the requested file
var fileContent = System.IO.File.ReadAllText(filePath);
return Content(fileContent, "application/xml");
}
}
}
}
What is step two? In a load ballanced, or cloud environment that could scale to more instances, you would probably not want to store the sitemap file on disk?
@Tomas Hendrud Gulla,
This is a just a basic example to get started if anyone does not want to use third party plugins and want to implement their own
Please feel free to provide suggessions or changes if needed.
Thanks
Sunil
Ok, the blog post is updated after my comment. It has more steps now.
Adding the cache can also be a good option
be careful with physical files in the skies.
any real reasons (except corporate blsh) not being able to use 3rd party library and spend time implementing, maintaining and improving (like feature request - to control which pages are included and which are not included) home-brewed sitemap generator?
Any suggestions on where to save the sitemaps?
within a block or page?