File: /home/nyiet8349bzl/www/oldwebsite.sbsc.in/wp-content/plugins/simply-static/src/class-ss-util.php
<?php
namespace Simply_Static;
// Exit if accessed directly
if ( ! defined( 'ABSPATH' ) ) {
exit;
}
/**
* Simply Static utility class
*/
class Util {
/**
* Return a filterable list of admin-only plugin directory slugs to always exclude
* from Enhanced Crawl "Plugins to Include" selections and auto-include logic.
*
* This centralizes the defaults and mirrors previous lists used in Admin REST and Plugin.
*
* Filter: `ss_admin_only_plugins` allows adding/removing slugs.
*
* @return string[] Array of sanitized, unique plugin directory slugs.
*/
public static function get_admin_only_plugins(): array {
$defaults = array(
'advanced-custom-fields',
'secure-custom-fields',
'query-monitor',
'debug-bar',
'health-check',
'user-switching',
'wp-crontrol',
'theme-check',
'regenerate-thumbnails',
'wp-migrate-db',
'wp-migrate-db-pro',
'wp-staging',
'wp-staging-pro',
'rollback',
'wp-rollback',
'classic-editor',
'artiss-transient-cleaner',
'updraftplus',
'user-switchting',
'view-admin-as',
'wp-beta-tester',
'wp-downgrade',
'wp-rest-cache',
'wp-reset',
'wpvidid-backuprestore',
'duplicate-post',
);
/**
* Filter the list of admin-only plugin slugs that should be excluded from Enhanced Crawl.
*
* @param string[] $defaults Directory slugs of admin-only plugins.
*/
$list = apply_filters( 'ss_admin_only_plugins', $defaults );
if ( ! is_array( $list ) ) {
$list = $defaults;
}
// Sanitize and de-duplicate.
$list = array_map( 'sanitize_title', array_filter( array_map( 'strval', $list ) ) );
return array_values( array_unique( $list ) );
}
/**
* Derive a plugin directory slug from a plugin path like akismet/akismet.php.
* Returns an empty string if it cannot be derived.
*
* @param string $plugin_path
* @return string
*/
public static function plugin_slug_from_path( string $plugin_path ): string {
$dir = trim( dirname( (string) $plugin_path ), '/' );
if ( $dir === '' || $dir === '.' ) {
return '';
}
return sanitize_title( $dir );
}
/**
* Append a plugin slug to Enhanced Crawl's "Plugins to Include" list and persist.
* De-duplicates and sanitizes slugs.
*
* @param string $slug Plugin directory slug
* @return void
*/
public static function add_plugin_to_enhanced_crawl( string $slug ): void {
$slug = sanitize_title( $slug );
$options = get_option( 'simply-static' );
if ( ! is_array( $options ) ) {
$options = array();
}
$current = array();
if ( isset( $options['plugins_to_include'] ) && is_array( $options['plugins_to_include'] ) ) {
$current = array_map( 'strval', $options['plugins_to_include'] );
}
if ( $slug !== '' ) {
$current[] = $slug;
}
$options['plugins_to_include'] = array_values( array_unique( array_map( 'sanitize_title', $current ) ) );
update_option( 'simply-static', $options );
}
/**
* Remove a plugin slug from Enhanced Crawl's "Plugins to Include" list and persist.
*
* @param string $slug Plugin directory slug
* @return void
*/
public static function remove_plugin_from_enhanced_crawl( string $slug ): void {
$options = get_option( 'simply-static' );
if ( ! is_array( $options ) ) {
$options = array();
}
$current = array();
if ( isset( $options['plugins_to_include'] ) && is_array( $options['plugins_to_include'] ) ) {
$current = array_map( 'strval', $options['plugins_to_include'] );
}
$slug = sanitize_title( (string) $slug );
$filtered = array();
foreach ( $current as $item ) {
if ( sanitize_title( $item ) !== $slug ) {
$filtered[] = $item;
}
}
$options['plugins_to_include'] = array_values( array_unique( array_map( 'sanitize_title', $filtered ) ) );
update_option( 'simply-static', $options );
}
/**
* Return the active theme slugs: child (stylesheet) and parent (template) if different.
*
* @return string[]
*/
public static function active_theme_slugs(): array {
$theme = function_exists( 'wp_get_theme' ) ? wp_get_theme() : null;
if ( ! $theme || ! $theme->exists() ) {
return array();
}
$slugs = array();
$stylesheet = $theme->get_stylesheet();
if ( ! empty( $stylesheet ) ) {
$slugs[] = sanitize_title( $stylesheet );
}
$template = $theme->get_template();
if ( ! empty( $template ) ) {
$slugs[] = sanitize_title( $template );
}
return array_values( array_unique( $slugs ) );
}
/**
* Append one or more theme slugs to Enhanced Crawl's "Themes to Include" list and persist.
*
* @param string[] $slugs
* @return void
*/
public static function add_themes_to_enhanced_crawl( array $slugs ): void {
$options = get_option( 'simply-static' );
if ( ! is_array( $options ) ) {
$options = array();
}
$current = array();
if ( isset( $options['themes_to_include'] ) && is_array( $options['themes_to_include'] ) ) {
$current = array_map( 'strval', $options['themes_to_include'] );
}
foreach ( $slugs as $slug ) {
$slug = sanitize_title( (string) $slug );
if ( $slug !== '' ) {
$current[] = $slug;
}
}
$options['themes_to_include'] = array_values( array_unique( array_map( 'sanitize_title', $current ) ) );
update_option( 'simply-static', $options );
}
/**
* Automatically remove a deactivated plugin from Enhanced Crawl's "Plugins to Include" list.
*
* Hook callback for `deactivated_plugin`.
*
* Filters used:
* - `ss_auto_remove_on_deactivation` (bool) Gate entire behavior (default true).
*
* @param string $plugin Relative plugin path like akismet/akismet.php
* @param bool $network_deactivating True if network-deactivated on multisite
* @return void
*/
public static function maybe_auto_remove_deactivated_plugin( $plugin, $network_deactivating ): void {
$enabled = apply_filters( 'ss_auto_remove_on_deactivation', true );
if ( ! $enabled ) {
return;
}
$slug = self::plugin_slug_from_path( (string) $plugin );
if ( '' === $slug ) {
return;
}
if ( is_multisite() && $network_deactivating ) {
$sites = function_exists( 'get_sites' ) ? get_sites( array( 'fields' => 'ids' ) ) : array();
if ( is_array( $sites ) ) {
foreach ( $sites as $blog_id ) {
switch_to_blog( (int) $blog_id );
self::remove_plugin_from_enhanced_crawl( $slug );
restore_current_blog();
}
}
return;
}
self::remove_plugin_from_enhanced_crawl( $slug );
}
/**
* Include the currently active theme (and parent, if child theme is used) into
* Enhanced Crawl's "Themes to Include" list whenever the theme is switched.
*
* Hook callback for `after_switch_theme`.
*
* Filters used:
* - `ss_auto_include_themes_on_switch` (bool) Control if this runs (default true).
* - `ss_auto_include_skip_themes` (string[]) Theme slugs to skip.
*
* @param mixed ...$args Ignored. Present for compatibility with WP action parameters.
* @return void
*/
public static function maybe_auto_include_active_theme( ...$args ): void {
$enabled = apply_filters( 'ss_auto_include_themes_on_switch', true );
if ( ! $enabled ) {
return;
}
$slugs = self::active_theme_slugs();
$skip = apply_filters( 'ss_auto_include_skip_themes', array() );
if ( ! is_array( $skip ) ) {
$skip = array();
}
$skip = array_map( 'sanitize_title', array_filter( array_map( 'strval', $skip ) ) );
$slugs = array_values( array_diff( $slugs, $skip ) );
if ( empty( $slugs ) ) {
return;
}
self::add_themes_to_enhanced_crawl( $slugs );
}
/**
* Automatically add an activated plugin to Enhanced Crawl's "Plugins to Include" list.
*
* Hook callback for `activated_plugin`. Kept here to avoid bloating Plugin class.
*
* Filters used:
* - `ss_auto_include_on_activation` (bool) Gate entire behavior (default true).
* - `ss_auto_include_skip_plugins` (string[]) Additional plugin slugs to skip.
* - `ss_admin_only_plugins` (string[]) Centralized admin-only list.
* - `ss_auto_include_self_slugs` (string[]) Defaults to ['simply-static','simply-static-pro'].
* - `ss_auto_include_debug` (bool) Enable error_log debug lines.
*
* @param string $plugin Relative plugin path like akismet/akismet.php
* @param bool $network_wide True if network-activated on multisite
* @return void
*/
public static function maybe_auto_include_activated_plugin( $plugin, $network_wide ): void {
// Allow disabling globally.
$enabled = apply_filters( 'ss_auto_include_on_activation', true );
if ( ! $enabled ) {
return;
}
// Derive the plugin directory slug.
$slug = self::plugin_slug_from_path( (string) $plugin );
if ( '' === $slug ) {
return;
}
$slug_lc = strtolower( $slug );
// Exclusion lists: admin-only, custom, and self slugs.
$admin_only_list = self::get_admin_only_plugins();
$admin_only_lc = array_map( 'strtolower', array_map( 'strval', (array) $admin_only_list ) );
$custom_skips = apply_filters( 'ss_auto_include_skip_plugins', array() );
if ( ! is_array( $custom_skips ) ) {
$custom_skips = array();
}
$custom_skips_lc = array_map( 'strtolower', array_map( 'sanitize_title', array_filter( array_map( 'strval', $custom_skips ) ) ) );
$self_defaults = array( 'simply-static', 'simply-static-pro' );
$self_slugs = apply_filters( 'ss_auto_include_self_slugs', $self_defaults );
if ( ! is_array( $self_slugs ) ) {
$self_slugs = $self_defaults;
}
$self_slugs_lc = array_map( 'strtolower', array_map( 'sanitize_title', array_filter( array_map( 'strval', $self_slugs ) ) ) );
$skip_lc = array_values( array_unique( array_merge( $admin_only_lc, $custom_skips_lc, $self_slugs_lc ) ) );
if ( in_array( $slug_lc, $skip_lc, true ) ) {
if ( apply_filters( 'ss_auto_include_debug', false ) ) {
error_log( sprintf( '[Simply Static] Auto-include skipped for plugin "%s" (in skip list).', $slug ) );
}
return;
}
if ( is_multisite() && $network_wide ) {
$sites = function_exists( 'get_sites' ) ? get_sites( array( 'fields' => 'ids' ) ) : array();
if ( is_array( $sites ) ) {
foreach ( $sites as $blog_id ) {
switch_to_blog( (int) $blog_id );
self::add_plugin_to_enhanced_crawl( $slug );
restore_current_blog();
}
}
if ( apply_filters( 'ss_auto_include_debug', false ) ) {
error_log( sprintf( '[Simply Static] Auto-included plugin "%s" for %d site(s) via network activation.', $slug, is_array( $sites ) ? count( $sites ) : 0 ) );
}
return;
}
// Single site or per-site activation.
self::add_plugin_to_enhanced_crawl( $slug );
if ( apply_filters( 'ss_auto_include_debug', false ) ) {
error_log( sprintf( '[Simply Static] Auto-included plugin "%s" on this site.', $slug ) );
}
}
/**
* Parse a list of user-provided lines into literals and regex patterns.
* Lines wrapped like /pattern/flags are treated as regex; others as literals.
*
* @param array $lines
* @return array{literals: string[], regex: string[]}
*/
public static function parse_patterns( array $lines ): array {
$literals = [];
$regex = [];
foreach ( $lines as $line ) {
$line = trim( (string) $line );
if ( $line === '' ) { continue; }
// Regex if starts and ends with /, allow optional flags like i,m,s,u
if ( strlen( $line ) >= 2 && $line[0] === '/' && strrpos( $line, '/' ) !== 0 ) {
$last = strrpos( $line, '/' );
$pattern = substr( $line, 0, $last + 1 );
$flags = substr( $line, $last + 1 );
// Validate pattern
$valid = @preg_match( $pattern . $flags, '' );
if ( $valid !== false ) {
$regex[] = $pattern . $flags;
continue;
}
}
$literals[] = $line;
}
return [ 'literals' => array_values( array_unique( $literals ) ), 'regex' => array_values( array_unique( $regex ) ) ];
}
/**
* Build a candidate URL list used when resolving regex in Additional URLs.
* This is a best-effort set: home/front page, all public posts, term links, author links,
* and common archive/pagination URLs. Filterable and capped by limits.
*
* @return string[]
*/
public static function candidate_urls_for_regex(): array {
$limit = (int) apply_filters( 'ss_regex_candidate_url_limit', 5000 );
$urls = [];
$urls[] = home_url( '/' );
if ( 'page' === get_option( 'show_on_front' ) ) {
$front_id = (int) get_option( 'page_on_front' );
if ( $front_id ) {
$u = get_permalink( $front_id ); if ( is_string( $u ) ) { $urls[] = $u; }
}
}
// Posts and pages of public types
$post_types = get_post_types( [ 'public' => true ], 'names' );
unset( $post_types['attachment'] );
$post_types = apply_filters( 'simply_static_post_types_to_crawl', $post_types );
$q = [ 'post_type' => $post_types, 'post_status' => 'publish', 'posts_per_page' => -1, 'fields' => 'ids' ];
$ids = get_posts( $q );
foreach ( (array) $ids as $pid ) {
$u = get_permalink( $pid ); if ( is_string( $u ) ) { $urls[] = $u; }
if ( count( $urls ) >= $limit ) { break; }
}
if ( count( $urls ) < $limit ) {
// Terms
$taxes = get_taxonomies( [ 'public' => true ], 'names' );
foreach ( $taxes as $tx ) {
$terms = get_terms( [ 'taxonomy' => $tx, 'hide_empty' => true ] );
if ( is_wp_error( $terms ) ) { continue; }
foreach ( $terms as $term ) {
$u = get_term_link( $term ); if ( ! is_wp_error( $u ) ) { $urls[] = $u; }
if ( count( $urls ) >= $limit ) { break 2; }
}
}
}
if ( count( $urls ) < $limit ) {
// Authors
$users = get_users( [ 'who' => 'authors' ] );
foreach ( (array) $users as $user ) {
$u = get_author_posts_url( $user->ID ); if ( is_string( $u ) ) { $urls[] = $u; }
if ( count( $urls ) >= $limit ) { break; }
}
}
return array_values( array_unique( $urls ) );
}
/**
* Determine if a URL should be excluded based on settings and patterns.
* Centralized helper used by crawlers and fetch tasks.
*
* @param string $url
* @return bool
*/
public static function is_url_excluded( string $url ): bool {
// Never exclude core WordPress or Simply Static assets
if ( self::is_core_include_asset( $url ) ) {
return false;
}
$excluded = array( '.php' );
$opts = Options::instance();
// Exclude debug files (.log, .txt) but not robots.txt, llms.txt, _redirects, or _headers
if ( preg_match( '/\.(log|txt)$/i', $url ) && strpos( $url, 'debug' ) !== false && strpos( $url, 'robots.txt' ) === false && strpos( $url, 'llms.txt' ) === false && strpos( $url, '_redirects' ) === false && strpos( $url, '_headers' ) === false ) {
return true;
}
// Exclude feeds if add_feeds is not true.
if ( ! $opts->get( 'add_feeds' ) ) {
// Only exclude WordPress feed-style URLs
if ( preg_match( '/(\/feed\/?$|\?feed=|\/feed\/|\/rss\/?$|\/atom\/?$)/i', $url ) ) {
return true;
}
}
// Exclude Rest API if add_rest_api is not true.
if ( ! $opts->get( 'add_rest_api' ) ) {
$excluded[] = 'wp-json';
}
$urls_to_exclude = $opts->get( 'urls_to_exclude' );
$regex_patterns = [];
if ( ! empty( $urls_to_exclude ) ) {
if ( is_array( $urls_to_exclude ) ) {
$excluded_by_option = wp_list_pluck( $urls_to_exclude, 'url' );
} else {
$excluded_by_option = explode( "\n", $urls_to_exclude );
}
if ( is_array( $excluded_by_option ) ) {
// Normalize: trim whitespace/CRLF, drop empties, unique
$excluded_by_option = array_filter( array_map( 'trim', $excluded_by_option ), function ( $v ) {
return $v !== '';
} );
$excluded_by_option = array_unique( $excluded_by_option );
$parsed = self::parse_patterns( $excluded_by_option );
$excluded = array_merge( $excluded, $parsed['literals'] );
$regex_patterns = $parsed['regex'];
}
}
if ( apply_filters( 'simply_static_exclude_temp_dir', true ) ) {
$excluded[] = self::get_temp_dir_url();
}
$excluded = apply_filters( 'ss_excluded_by_default', $excluded );
if ( $excluded ) {
$excluded = array_filter( $excluded );
}
// First test regex patterns if provided
foreach ( (array) $regex_patterns as $pattern ) {
if ( @preg_match( $pattern, $url ) ) {
if ( preg_match( $pattern, $url ) ) { return true; }
}
}
// Then test literal contains (case-insensitive)
if ( ! empty( $excluded ) ) {
foreach ( $excluded as $excludable ) {
if ( stripos( $url, $excludable ) !== false ) {
return true;
}
}
}
return false;
}
/**
* Get all active plugins for the current site, including network-activated plugins on multisite.
*
* Returns a list of plugin basenames (e.g. akismet/akismet.php).
*
* @return array
*/
public static function get_all_active_plugins(): array {
$active = (array) get_option( 'active_plugins', [] );
if ( function_exists( 'is_multisite' ) && is_multisite() ) {
// Network-activated plugins are stored as an associative array with plugin file as the key.
$network = (array) get_site_option( 'active_sitewide_plugins', [] );
$network_plugins = array_keys( $network );
$active = array_merge( $active, $network_plugins );
}
$active = array_values( array_unique( $active ) );
sort( $active );
return $active;
}
/**
* Compute the target Static Site URL based on Simply Static settings.
* Returns an empty string if it cannot be determined.
*
* Logic:
* - destination_url_type = 'relative' and relative_path not empty:
* Use the current site's scheme (https if wp_is_using_https() or is_ssl()),
* then build home_url( '/', $scheme ) + relative_path.
* - destination_url_type = 'absolute' with non-empty destination_scheme and destination_host:
* Normalize and return scheme://host.
*
* @return string The static site URL or empty string when unavailable.
*/
public static function get_static_site_url() {
$options = get_option( 'simply-static' );
if ( empty( $options ) || ! is_array( $options ) ) {
return '';
}
$type = isset( $options['destination_url_type'] ) ? strtolower( trim( $options['destination_url_type'] ) ) : '';
$target_url = '';
if ( 'relative' === $type ) {
$relative_path = isset( $options['relative_path'] ) ? trim( $options['relative_path'] ) : '';
if ( $relative_path !== '' ) {
$scheme = ( function_exists( 'wp_is_using_https' ) && wp_is_using_https() ) ? 'https' : ( is_ssl() ? 'https' : 'http' );
$base_url = home_url( '/', $scheme );
$target_url = trailingslashit( $base_url ) . ltrim( $relative_path, '/' );
}
} elseif ( 'absolute' === $type ) {
$scheme = isset( $options['destination_scheme'] ) ? trim( $options['destination_scheme'] ) : '';
$host = isset( $options['destination_host'] ) ? trim( $options['destination_host'] ) : '';
if ( $scheme !== '' && $host !== '' ) {
$scheme = preg_replace( '/:\\/*$/', '', $scheme );
$host = preg_replace( '/^\\/*/', '', $host );
$target_url = $scheme . '://' . $host;
}
}
return $target_url;
}
/**
* Get the protocol used for the origin URL
* @return string http or https
*/
public static function origin_scheme() {
$pattern = '/:\/\/.*/';
return preg_replace( $pattern, '', self::origin_url() );
}
/**
* Get the host for the origin URL
* @return string host (URL minus the protocol)
*/
public static function origin_host() {
return untrailingslashit( self::strip_protocol_from_url( self::origin_url() ) );
}
/**
* Wrapper around home_url(). Useful for swapping out the URL during debugging.
* @return string home URL
*/
public static function origin_url() {
$options = Options::instance();
if ( $options->get( 'origin_url' ) ) {
return apply_filters( 'ss_origin_url', esc_url( untrailingslashit( $options->get( 'origin_url' ) ) ) );
}
return apply_filters( 'ss_origin_url', untrailingslashit( home_url() ) );
}
/**
* Strip UTF-8 BOM from a string
*
* @param string $string String to strip BOM from
* @return string
*/
public static function strip_bom( $string ) {
if ( ! is_string( $string ) ) {
return $string;
}
return preg_replace( '/^\xEF\xBB\xBF/', '', $string );
}
/**
* Truncate if a string exceeds a certain length (30 chars by default)
* @return string
*/
public static function truncate( $string, $length = 30, $omission = '...' ) {
return ( strlen( $string ) > $length + 3 ) ? ( substr( $string, 0, $length ) . $omission ) : $string;
}
/**
* Dump an object to error_log
*
* @param mixed $object Object to dump to the error log
*
* @return void
*/
public static function error_log( $object = null ) {
$contents = self::get_contents_from_object( $object );
error_log( $contents );
}
/**
* Clear the debug log
* @return void
*/
public static function clear_debug_log() {
$debug_file = self::get_debug_log_filename();
if ( file_exists( $debug_file ) ) {
// Clear file
file_put_contents( $debug_file, '' );
}
}
/**
* Save an object/string to the debug log
*
* @param mixed $object Object to save to the debug log
*
* @return void
*/
public static function debug_log( $object = null ) {
$options = Options::instance();
if ( ! $options->get( 'debugging_mode' ) ) {
return;
}
$debug_file = self::get_debug_log_filename();
if ( ! file_exists( $debug_file ) ) {
wp_mkdir_p( dirname( $debug_file ) );
}
// add timestamp and newline
$message = '[' . date( 'Y-m-d H:i:s' ) . '] ';
$trace = debug_backtrace();
if ( isset( $trace[0]['file'] ) ) {
$file = basename( $trace[0]['file'] );
if ( isset( $trace[0]['line'] ) ) {
$file .= ':' . $trace[0]['line'];
}
$message .= '[' . $file . '] ';
}
$contents = self::get_contents_from_object( $object );
// get message onto a single line
$contents = preg_replace( "/\r|\n/", "", $contents );
$message .= $contents . "\n";
// log the message to the debug file instead of the usual error_log location
error_log( $message, 3, $debug_file );
}
/**
* Return the filename for the debug log
* @return string Filename for the debug log
*/
public static function get_debug_log_filename() {
// Get directories.
$uploads_dir = wp_upload_dir();
$simply_static_dir = $uploads_dir['basedir'] . DIRECTORY_SEPARATOR . 'simply-static' . DIRECTORY_SEPARATOR;
// Set name for debug file.
$options = get_option( 'simply-static' );
if ( isset( $options['encryption_key'] ) ) {
return apply_filters( 'ss_debug_log_file', $simply_static_dir . $options['encryption_key'] . '-debug.txt', $options['encryption_key'] );
} else {
return apply_filters( 'ss_debug_log_file', $simply_static_dir . 'debug.txt', '' );
}
}
/**
* Get contents of an object as a string
*
* @param mixed $object Object to get string for
*
* @return string String containing the contents of the object
*/
protected static function get_contents_from_object( $object ) {
// Handle common scalar types early and safely
if ( is_string( $object ) ) {
// Prevent huge memory usage by truncating very large strings
return self::truncate( $object, 5000 );
}
if ( is_null( $object ) ) {
return 'NULL';
}
if ( is_bool( $object ) ) {
return $object ? 'TRUE' : 'FALSE';
}
if ( is_int( $object ) || is_float( $object ) ) {
return (string) $object;
}
if ( is_resource( $object ) ) {
return 'resource(' . get_resource_type( $object ) . ')';
}
// For arrays/objects, avoid var_dump which can explode memory usage.
// Prefer JSON with partial output on error; fall back to print_r.
$max_length = apply_filters( 'simply_static_debug_max_length', 100000 ); // 100 KB by default
$json_opts = defined( 'JSON_PARTIAL_OUTPUT_ON_ERROR' ) ? JSON_PARTIAL_OUTPUT_ON_ERROR : 0;
$encoded = function_exists( 'wp_json_encode' ) ? wp_json_encode( $object, $json_opts, 5 ) : json_encode( $object, $json_opts, 5 );
if ( $encoded === false || $encoded === null ) {
$encoded = print_r( $object, true );
}
if ( strlen( $encoded ) > $max_length ) {
$encoded = substr( $encoded, 0, $max_length ) . '... [truncated]';
}
return $encoded;
}
public static function is_valid_scheme( $scheme ) {
$valid_schemes = apply_filters( 'simply_static_valid_schemes', [
'http',
'https',
] );
return in_array( $scheme, $valid_schemes );
}
/**
* Given a URL extracted from a page, return an absolute URL
*
* Takes a URL (e.g. /test) extracted from a page (e.g. http://foo.com/bar/) and
* returns an absolute URL (e.g. http://foo.com/bar/test). Absolute URLs are
* returned as-is. Exception: links beginning with a # (hash) are left as-is.
*
* A null value is returned in the event that the extracted_url is blank or it's
* unable to be parsed.
*
* @param string $extracted_url Relative or absolute URL extracted from page
* @param string $page_url URL of page
*
* @return string|null Absolute URL, or null
*/
public static function relative_to_absolute_url( $extracted_url, $page_url ) {
// we can't do anything with null or blank urls
if ( $extracted_url === null ) {
return null;
}
$extracted_url = trim( $extracted_url );
// we can't do anything with blank urls
if ( $extracted_url === '' ) {
return null;
}
// if we get a hash, e.g. href='#section-three', just return it as-is
if ( strpos( $extracted_url, '#' ) === 0 ) {
return $extracted_url;
}
// check for a protocol-less URL
// (Note: there's a bug in PHP <= 5.4.7 where parsed URLs starting with //
// are treated as a path. So we're doing this check upfront.)
// http://php.net/manual/en/function.parse-url.php#example-4617
if ( strpos( $extracted_url, '//' ) === 0 ) {
// if this is a local URL, add the protocol to the URL
if ( stripos( $extracted_url, '//' . self::origin_host() ) === 0 ) {
$extracted_url = self::origin_scheme() . ':' . $extracted_url;
}
return $extracted_url;
}
$parsed_extracted_url = parse_url( $extracted_url );
// parse_url can sometimes return false; bail if it does
if ( $parsed_extracted_url === false ) {
return null;
}
// if no path, check for an ending slash; if there isn't one, add one
if ( ! isset( $parsed_extracted_url['path'] ) ) {
if ( isset( $parsed_extracted_url['scheme'] ) && ! self::is_valid_scheme( $parsed_extracted_url['scheme'] ) ) {
return $extracted_url;
}
$clean_url = self::remove_params_and_fragment( $extracted_url );
$fragment = substr( $extracted_url, strlen( $clean_url ) );
$extracted_url = trailingslashit( $clean_url ) . $fragment;
}
if ( isset( $parsed_extracted_url['host'] ) ) {
return $extracted_url;
} elseif ( isset( $parsed_extracted_url['scheme'] ) ) {
// examples of schemes without hosts: java:, data:
return $extracted_url;
} else { // no host on extracted page (might be relative url)
$path = isset( $parsed_extracted_url['path'] ) ? $parsed_extracted_url['path'] : '';
$query = isset( $parsed_extracted_url['query'] ) ? '?' . $parsed_extracted_url['query'] : '';
$fragment = isset( $parsed_extracted_url['fragment'] ) ? '#' . $parsed_extracted_url['fragment'] : '';
// turn our relative url into an absolute url
$extracted_url = PhpUri::parse( $page_url )->join( $path . $query . $fragment );
return $extracted_url;
}
}
/**
* Recursively create a path from one page to another
*
* Takes a path (e.g. /blog/foobar/) extracted from a page (e.g. /blog/page/3/)
* and returns a path to get to the extracted page from the current page
* (e.g. ./../../foobar/index.html). Since this is for offline use, the path
* return will include a /index.html if the extracted path doesn't contain
* an extension.
*
* The function recursively calls itself, cutting off sections of the page path
* until the base matches the extracted path or it runs out of parts to remove,
* then it builds out the path to the extracted page.
*
* @param string $extracted_path Relative or absolute URL extracted from page.
* @param string $page_path URL of page.
* @param int $iterations Number of times the page path has been chopped.
*
* @return string|null Absolute URL, or null
*/
public static function create_offline_path( $extracted_path, $page_path, $iterations = 0 ) {
if ( $iterations === 0 ) {
// Remove index.html if it's there
$page_path = self::strip_index_filenames_from_url( $page_path );
// If it's not the root, and it doesn't have a trailing slash,
// it's likely a file (like r080108-1.html).
// We want to chop off the file part for depth calculation.
if ( $page_path !== '/' && substr( $page_path, - 1 ) !== '/' ) {
$page_path = dirname( $page_path );
}
$page_path = trailingslashit( $page_path );
}
if ( $page_path === '/' || strpos( $extracted_path, $page_path ) === 0 ) {
$extracted_path = substr( $extracted_path, strlen( $page_path ) === 1 ? 0 : strlen( $page_path ) );
$new_path = '.' . str_repeat( '/..', $iterations ) . self::add_leading_slash( $extracted_path );
return $new_path;
} else {
// match everything before the last slash
$pattern = '/(.*)\/[^\/]*\/$/';
// remove the last slash and anything after it
$new_page_path = preg_replace( $pattern, '$1/', $page_path );
return self::create_offline_path( $extracted_path, $new_page_path, ++ $iterations );
}
}
/**
* Check if URL starts with same URL as WordPress installation
*
* Both http and https are assumed to be the same domain.
*
* @param string $url URL to check
*
* @return boolean true if URL is local, false otherwise
*/
public static function is_local_url( $url ) {
$url_host = self::strip_protocol_from_url( self::remove_params_and_fragment( $url ) );
// Strip port if present
$url_host = preg_replace( '/:\d+/', '', $url_host );
$origin_host = self::origin_host();
// Strip port if present
$origin_host = preg_replace( '/:\d+/', '', $origin_host );
return apply_filters( 'ss_is_local_url', ( stripos( $url_host, $origin_host ) === 0 ) );
}
/**
* Check if WP-Cron is running.
*
* @return bool
*/
public static function is_cron(): bool {
// Return false only when WP-Cron is explicitly disabled and no SS_CRON override is set.
$wp_cron_disabled = ( defined( 'DISABLE_WP_CRON' ) && ( true === constant( 'DISABLE_WP_CRON' ) ) );
if ( $wp_cron_disabled && ! defined( 'SS_CRON' ) ) {
return false;
}
return true;
}
/**
* Get the path from a local URL, removing the protocol and host
*
* @param string $url URL to strip protocol/host from
*
* @return string URL sans protocol/host
*/
public static function get_path_from_local_url( $url ) {
// Keep behavior robust: only remove the leading scheme/host (and optional base path),
// never replace occurrences inside filenames or deeper path segments.
if ( ! is_string( $url ) ) {
return $url;
}
// Remove scheme to work with a canonical form like: host[:port]/base/path...
$no_scheme = self::strip_protocol_from_url( $url );
// Determine origin components reliably (host and optional base path for subdirectory installs)
$origin_parts = function_exists( 'wp_parse_url' ) ? wp_parse_url( self::origin_url() ) : parse_url( self::origin_url() );
if ( ! is_array( $origin_parts ) ) {
$origin_parts = array();
}
$origin_host = isset( $origin_parts['host'] ) ? $origin_parts['host'] : '';
$origin_path = isset( $origin_parts['path'] ) ? untrailingslashit( $origin_parts['path'] ) : '';
// Build an anchored pattern that matches only the leading host[:port] and optional base path.
if ( $origin_host !== '' ) {
$host_pattern = '^' . preg_quote( $origin_host, '/' ) . '(?::\d+)?';
$base_pattern = $origin_path !== '' ? preg_quote( $origin_path, '/' ) : '';
$pattern = '/' . $host_pattern . $base_pattern . '/';
$no_host = preg_replace( $pattern, '', $no_scheme, 1 );
} else {
// Fallback: origin_host() may include subdirectory; strip only once at the start to avoid touching filenames.
$fallback = self::origin_host();
$pattern = '/^' . preg_quote( $fallback, '/' ) . '/';
$no_host = preg_replace( $pattern, '', $no_scheme, 1 );
}
// Ensure a single leading slash for a clean local path, preserving any query/fragment later.
return '/' . ltrim( $no_host, '/' );
}
/**
* Returns a URL w/o the query string or fragment (i.e. nothing after the path)
*
* @param string $url URL to remove query string/fragment from
*
* @return string URL without query string/fragment
*/
public static function remove_params_and_fragment( $url ) {
return preg_replace( '/(\?|#).*/', '', $url );
}
/**
* Converts a textarea into an array w/ each line being an entry in the array
*
* @param string $textarea Textarea to convert
*
* @return array Converted array
*/
public static function string_to_array( $textarea ) {
if ( ! is_string( $textarea ) ) {
return array();
}
// using preg_split to intelligently break at newlines
// see: http://stackoverflow.com/questions/1483497/how-to-put-string-in-array-split-by-new-line
$lines = preg_split( "/\r\n|\n|\r/", $textarea );
array_walk( $lines, 'trim' );
$lines = array_filter( $lines );
return $lines;
}
/**
* Remove the //, http://, https:// protocols from a URL
*
* @param string $url URL to remove protocol from
*
* @return string URL sans http/https protocol
*/
public static function strip_protocol_from_url( $url ) {
$pattern = '/^(https?:)?\/\//';
return preg_replace( $pattern, '', $url );
}
/**
* Remove index.html/index.php from a URL
*
* @param string $url URL to remove index file from
*
* @return string URL sans index file
*/
public static function strip_index_filenames_from_url( $url ) {
$pattern = '/index.(html?|php)$/';
return preg_replace( $pattern, '', $url );
}
/**
* Get the current datetime formatted as a string for entry into MySQL
* @return string MySQL formatted datetime
*/
public static function formatted_datetime() {
return current_time( 'Y-m-d H:i:s' );
}
/**
* Sanitize filename to remove problematic Unicode characters
*
* @param string $filename Filename to sanitize.
* @return string Sanitized filename.
*/
public static function sanitize_filename( $filename ) {
if ( $filename === '__qs' ) return $filename;
/**
* Filter whether to preserve original filenames without sanitization.
*
* When true, spaces and special characters in filenames are preserved
* instead of being converted to hyphens. This is useful for sites with
* large existing static structures that rely on original filenames.
*
* @since 3.6.0.1
*
* @param bool $preserve Whether to preserve original filenames. Default false.
*/
if ( apply_filters( 'ss_preserve_original_filenames', false ) ) {
return $filename;
}
// Bypass for safe ASCII (alphanumerics, hyphens, underscores, dots)
if ( preg_match( '/^[a-zA-Z0-9\-_.]+$/', $filename ) && substr( $filename, -1 ) !== '.' ) {
return $filename;
}
$filename = html_entity_decode( $filename, ENT_QUOTES, 'UTF-8' );
if ( function_exists( 'remove_accents' ) ) {
$filename = remove_accents( $filename );
}
// Remove bullet points, ellipses, copyright, and private use characters
$filename = preg_replace( '/[\x{2022}\x{2026}\x{00A9}\x{E000}-\x{F8FF}]/u', '-', (string) $filename );
if ( is_null( $filename ) ) {
$filename = '';
}
return sanitize_file_name( $filename );
}
/**
* Sanitize each segment of a path.
*
* @param string $path Path to sanitize.
* @return string Sanitized path.
*/
public static function sanitize_path( $path ) {
$segments = explode( '/', $path );
$sanitized_segments = array_map( [ self::class, 'sanitize_filename' ], $segments );
return implode( '/', $sanitized_segments );
}
/**
* Sanitize a local path (sans host) while preserving query and fragment.
*
* @param string $path The path to sanitize.
* @return string Sanitized path with original query/fragment.
*/
public static function sanitize_local_path( $path ) {
$clean_path = self::remove_params_and_fragment( $path );
$query_fragment = substr( $path, strlen( $clean_path ) );
return self::sanitize_path( urldecode( $clean_path ) ) . $query_fragment;
}
/**
* Similar to PHP's pathinfo(), but designed with URL paths in mind (instead of directories)
*
* Example:
* $info = self::url_path_info( '/manual/en/function.pathinfo.php?test=true' );
* $info['dirname'] === '/manual/en/'
* $info['basename'] === 'function.pathinfo.php'
* $info['extension'] === 'php'
* $info['filename'] === 'function.pathinfo'
*
* @param string $path The URL path
*
* @return array Array containing info on the parts of the path
*/
public static function url_path_info( $path ) {
$info = array(
'dirname' => '',
'basename' => '',
'filename' => '',
'extension' => ''
);
$path = self::remove_params_and_fragment( $path );
// everything after the last slash is the filename
$last_slash_location = strrpos( $path, '/' );
if ( $last_slash_location === false ) {
$info['basename'] = $path;
} else {
$info['dirname'] = substr( $path, 0, $last_slash_location + 1 );
$info['basename'] = substr( $path, $last_slash_location + 1 );
}
// finding the dot for the extension
$last_dot_location = strrpos( $info['basename'], '.' );
if ( $last_dot_location === false ) {
$info['filename'] = $info['basename'];
} else {
$info['filename'] = substr( $info['basename'], 0, $last_dot_location );
$info['extension'] = substr( $info['basename'], $last_dot_location + 1 );
}
// substr sets false if it fails, we're going to reset those values to ''
foreach ( $info as $name => $value ) {
if ( $value === false ) {
$info[ $name ] = '';
}
}
return $info;
}
public static function is_local_asset_url( $url ) {
if ( ! self::is_local_url( $url ) ) {
return false;
}
$allowed_asset_extensions = apply_filters( 'simply_static_allowed_local_asset_extensions', [
// Images
'webp', 'gif', 'jpg', 'jpeg', 'png', 'svg', 'ico', 'cur',
// Media
'mp4', 'webm', 'ogg', 'ogv', 'mp3', 'wav',
// Data/Docs
'json', 'xml', 'csv', 'pdf', 'txt',
// Web assets
'js', 'css',
// Fonts
'woff2', 'woff', 'ttf', 'eot', 'otf'
] );
$path_info = self::url_path_info( $url );
if ( empty( $path_info['extension'] ) ) {
return false;
}
return in_array( $path_info['extension'], $allowed_asset_extensions, true );
}
/**
* Check if a URL belongs to core WordPress or Simply Static assets that should never be excluded.
*
* @param string $url The URL to check.
* @return bool True if it's a core include asset.
*/
public static function is_core_include_asset( string $url ): bool {
$core_paths = [ includes_url(), plugins_url() . '/simply-static/', plugins_url() . '/simply-static-pro/' ];
foreach ( $core_paths as $path ) {
if ( strpos( $url, $path ) !== false ) return self::is_local_asset_url( $url );
}
return false;
}
/**
* Ensure there is a single trailing directory separator on the path
*
* @param string $path File path to add trailing directory separator to
*/
public static function add_trailing_directory_separator( $path ) {
return self::remove_trailing_directory_separator( $path ) . DIRECTORY_SEPARATOR;
}
/**
* Remove all trailing directory separators
*
* @param string $path File path to remove trailing directory separators from
*/
public static function remove_trailing_directory_separator( $path ) {
return rtrim( $path, DIRECTORY_SEPARATOR );
}
/**
* Remove all leading directory separators
*
* @param string $path File path to remove leading directory separators from
*/
public static function remove_leading_directory_separator( $path ) {
if ( $path === null ) {
return '';
}
return ltrim( $path, DIRECTORY_SEPARATOR );
}
/**
* Add a slash to the beginning of a path
*
* @param string $path URL path to add leading slash to
*/
public static function add_leading_slash( $path ) {
return '/' . self::remove_leading_slash( $path );
}
/**
* Remove a slash from the beginning of a path
*
* @param string $path URL path to remove leading slash from
*/
public static function remove_leading_slash( $path ) {
if ( $path === null ) {
return '';
}
return ltrim( $path, '/' );
}
/**
* Add a message to the array of status messages for the job
*
* @param array $messages Array of messages to add the message to
* @param string $task_name Name of the task
* @param string $message Message to display about the status of the job
* @param boolean $unique If unique, the task_name/key will get a prefix if the same exists.
*
* @return array
*/
public static function add_archive_status_message( $messages, $task_name, $message, $unique = false ) {
if ( ! is_array( $messages ) ) {
$messages = array();
}
// if the state exists, set the datetime and message
if ( ! array_key_exists( $task_name, $messages ) || $unique ) {
if ( $unique ) {
$task_name = $task_name . '_' . uniqid();
}
$messages[ $task_name ] = array(
'message' => $message,
'datetime' => self::formatted_datetime()
);
} else { // otherwise just update the message
$messages[ $task_name ]['message'] = $message;
}
return $messages;
}
/**
* Get full URL from path.
*
* @param string $path given path.
*
* @return string
*/
public static function abs_path_to_url( $path = '' ) {
$normalized_path = wp_normalize_path( $path );
// Check if the path is within WP_CONTENT_DIR
if ( defined( 'WP_CONTENT_DIR' ) && defined( 'WP_CONTENT_URL' ) ) {
$normalized_content_dir = wp_normalize_path( untrailingslashit( WP_CONTENT_DIR ) );
// If the path starts with the content directory, use WP_CONTENT_URL for replacement
if ( strpos( $normalized_path, $normalized_content_dir ) === 0 ) {
$url = str_replace(
$normalized_content_dir,
untrailingslashit( WP_CONTENT_URL ),
$normalized_path
);
return esc_url_raw( $url );
}
}
// Default behavior for paths not in WP_CONTENT_DIR
$url = str_replace(
wp_normalize_path( untrailingslashit( ABSPATH ) ),
site_url(),
$normalized_path
);
return esc_url_raw( $url );
}
/**
* Combine multiple paths into a single path while handling varying slashes and trailing slashes
*
* @param string ...$paths Each path to combine. You can pass as many paths as you want.
*
* @return string The combined path
*/
public static function combine_path(): string {
$paths = func_get_args();
if ( count( $paths ) < 1 ) {
return '';
}
$paths = array_map( fn( $path ) => self::normalize_slashes( $path ), $paths );
// We don't strip the slashes from the first path because on Linux, paths start with a slash.
$trimmed_path = array_map( fn( $path ) => trim( trim( $path ), '/' ), array_slice( $paths, 1 ) );
array_unshift( $trimmed_path, untrailingslashit( $paths[0] ) );
return implode( '/', $trimmed_path );
}
/**
* Normalize slashes in a path to forward slashes
*
* @param string $path The path to normalize.
*
* @return string The normalized path.
*/
public static function normalize_slashes( string $path ): string {
return strpos( $path, '\\' ) !== false ? str_replace( '\\', '/', $path ) : $path;
}
/**
* Build a safe relative path from an absolute path and its base directory.
* Ensures forward slashes and a leading slash for consistent URL building.
*
* @param string $base_dir The base directory (prefix) of the absolute path.
* @param string $absolute_path The absolute path to the file.
* @return string The normalized relative path starting with '/'.
*/
public static function safe_relative_path( string $base_dir, string $absolute_path ): string {
$dir_norm = rtrim( $base_dir, DIRECTORY_SEPARATOR );
$rel = substr( $absolute_path, strlen( $dir_norm ) );
if ( $rel === false ) {
$rel = str_replace( $base_dir, '', $absolute_path );
}
$rel = str_replace( '\\', '/', $rel );
if ( $rel === '' || $rel[0] !== '/' ) {
$rel = '/' . ltrim( $rel, '/' );
}
return $rel;
}
/**
* Join a base URL and a relative path with exactly one slash.
*
* @param string $base_url Base URL (may end with or without a slash).
* @param string $relative_path Relative path (may start with or without a slash).
* @return string The joined URL.
*/
public static function safe_join_url( string $base_url, string $relative_path ): string {
return rtrim( $base_url, '/' ) . '/' . ltrim( $relative_path, '/' );
}
/**
* Returns the global $wp_filesystem with credentials set.
* Returns null in case of any errors.
*
* @return \WP_Filesystem_Base|null
*/
public static function get_file_system() {
global $wp_filesystem;
$success = true;
// Initialize the file system if it has not been done yet.
if ( ! $wp_filesystem ) {
require_once ABSPATH . '/wp-admin/includes/file.php';
$constants = array(
'hostname' => 'FTP_HOST',
'username' => 'FTP_USER',
'password' => 'FTP_PASS',
'public_key' => 'FTP_PUBKEY',
'private_key' => 'FTP_PRIKEY',
);
$credentials = array();
// We provide credentials based on wp-config.php constants.
// Reference https://developer.wordpress.org/apis/wp-config-php/#wordpress-upgrade-constants
foreach ( $constants as $key => $constant ) {
if ( defined( $constant ) ) {
$credentials[ $key ] = constant( $constant );
}
}
$success = WP_Filesystem( $credentials );
}
if ( ! $success || $wp_filesystem->errors->has_errors() ) {
return null;
}
return $wp_filesystem;
}
/**
* Clear all transients used in Simply Static.
*
* @return void
*/
public static function clear_transients() {
// Diagnostics.
delete_transient( 'simply_static_checks' );
delete_transient( 'simply_static_failed_tests' );
// Tasks.
$tasks = [
'fetch_urls',
'search',
'minify',
'optimize_directories',
'shortpixel',
'shortpixel_download',
'aws_empty',
'create_zip_archive',
'transfer_files_locally',
'github_blobs',
'github_commit',
'bunny_deploy',
'tiiny_deploy',
'aws_deploy',
'sftp_deploy',
];
foreach ( $tasks as $task ) {
delete_option( 'simply_static_' . $task . '_total_pages' );
}
}
public static function get_temp_dir_url() {
$dir = self::get_temp_dir();
return self::abs_path_to_url( $dir );
}
/*
* Get the absolute path to the temporary file directory.
*
*/
public static function get_temp_dir() {
$options = get_option( 'simply-static' );
// Preferred base temp directory from settings if provided and safe
if ( ! empty( $options['temp_files_dir'] ) ) {
$temp_dir = $options['temp_files_dir'];
// If a stream wrapper path is provided by a plugin (e.g., Infinite Uploads), avoid using it for local temp work.
if ( function_exists( 'wp_is_stream' ) && wp_is_stream( $temp_dir ) ) {
$temp_dir = '';
}
} else {
$temp_dir = '';
}
// Fallback to uploads dir if not set by option
if ( $temp_dir === '' ) {
$upload_dir = wp_upload_dir();
$basedir = isset( $upload_dir['basedir'] ) ? $upload_dir['basedir'] : '';
// Guard against stream wrappers like iu:// from offload plugins
if ( $basedir && ( ! function_exists( 'wp_is_stream' ) || ! wp_is_stream( $basedir ) ) ) {
$temp_dir = $basedir . DIRECTORY_SEPARATOR . 'simply-static' . DIRECTORY_SEPARATOR . 'temp-files';
}
}
// Final fallback to a guaranteed local path under WP_CONTENT_DIR or system temp
if ( $temp_dir === '' || ( function_exists( 'wp_is_stream' ) && wp_is_stream( $temp_dir ) ) ) {
if ( defined( 'WP_CONTENT_DIR' ) && WP_CONTENT_DIR ) {
$temp_dir = rtrim( WP_CONTENT_DIR, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . 'simply-static' . DIRECTORY_SEPARATOR . 'temp-files';
} else {
$temp_dir = rtrim( sys_get_temp_dir(), DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . 'simply-static' . DIRECTORY_SEPARATOR . 'temp-files';
}
}
// Ensure directory exists
if ( ! is_dir( $temp_dir ) ) {
wp_mkdir_p( $temp_dir );
}
return trailingslashit( $temp_dir );
}
/**
* Recursively delete contents of a directory but keep the directory itself.
*
* Rules:
* - No error suppression operators (@). We perform checks before FS calls to avoid warnings.
* - Very defensive: do nothing for empty/non-dirs and for very shallow paths.
*
* @param string $dir Absolute path to the directory whose contents should be cleared.
* @return void
*/
public static function delete_dir_contents( string $dir ): void {
$dir = (string) $dir;
if ( $dir === '' || ! is_dir( $dir ) ) {
return;
}
$normalized = str_replace( '\\', '/', $dir );
// Safety guard: do not operate on very shallow paths (like root-level). Require at least 3 path segments.
if ( substr_count( trim( $normalized, '/' ), '/' ) < 2 ) {
return;
}
$items = scandir( $dir );
if ( $items === false ) {
return;
}
foreach ( $items as $item ) {
if ( $item === '.' || $item === '..' ) {
continue;
}
$path = $dir . DIRECTORY_SEPARATOR . $item;
if ( is_dir( $path ) && ! is_link( $path ) ) {
self::delete_dir_contents( $path );
// Remove the now-empty directory if possible.
if ( is_dir( $path ) && is_writable( $path ) ) {
rmdir( $path );
}
} else {
// Files or links
if ( ( is_file( $path ) || is_link( $path ) ) && ( file_exists( $path ) || is_link( $path ) ) ) {
if ( is_writable( $path ) ) {
unlink( $path );
}
}
}
}
}
/**
* Normalize a URL to handle URL-encoded characters in the path.
*
* This function addresses issues with posts that have URL-encoded values
* in their post_name (e.g., "bedo%cc%88" instead of "bedö"). WordPress's
* get_permalink() may double-encode these values, causing the URLs to fail
* during static export.
*
* The function ONLY processes URLs that have double-encoded characters
* (containing %25 which is an encoded %). Other URLs are returned unchanged
* to avoid corrupting properly formatted URLs.
*
* @param string $url The URL to normalize.
* @return string The normalized URL.
*/
public static function normalize_url( $url ) {
if ( empty( $url ) || ! is_string( $url ) ) {
return $url;
}
$parsed = parse_url( $url );
if ( $parsed === false || ! isset( $parsed['path'] ) ) {
return $url;
}
// Only process URLs that have double-encoded characters (%25 is an encoded %)
// This prevents corrupting URLs that are already correctly formatted
if ( strpos( $parsed['path'], '%25' ) === false ) {
return $url;
}
// Decode the path to handle any double-encoding
// We decode twice to handle cases where % was encoded as %25
$decoded_path = urldecode( urldecode( $parsed['path'] ) );
// Re-encode the path properly, but preserve slashes
$path_segments = explode( '/', $decoded_path );
$encoded_segments = array_map( function( $segment ) {
// Use rawurlencode but restore characters that are valid in URL paths
// and commonly used in filenames (commas, tildes, etc.)
$encoded = rawurlencode( $segment );
// Restore safe characters that rawurlencode encodes but are valid in paths
// RFC 3986 sub-delims: !$&'()*+,;= and unreserved: -._~
$encoded = str_replace(
array( '%2C', '%7E', '%21', '%27', '%28', '%29', '%2A', '%40' ),
array( ',', '~', '!', "'", '(', ')', '*', '@' ),
$encoded
);
return $encoded;
}, $path_segments );
$normalized_path = implode( '/', $encoded_segments );
// Rebuild the URL
$normalized_url = '';
if ( isset( $parsed['scheme'] ) ) {
$normalized_url .= $parsed['scheme'] . '://';
}
if ( isset( $parsed['host'] ) ) {
$normalized_url .= $parsed['host'];
}
if ( isset( $parsed['port'] ) ) {
$normalized_url .= ':' . $parsed['port'];
}
$normalized_url .= $normalized_path;
if ( isset( $parsed['query'] ) ) {
$normalized_url .= '?' . $parsed['query'];
}
if ( isset( $parsed['fragment'] ) ) {
$normalized_url .= '#' . $parsed['fragment'];
}
return $normalized_url;
}
}