Object

org.apache.spark.util.Utils

public class Utils extends Object

Various utility methods used by Spark.

Nested Class Summary

Nested Classes

Modifier and Type

Class

Description

static class

Utils.OriginalTryStackTraceException
Constructor Summary

Constructors

Constructor

Description

Utils()
Method Summary

Modifier and Type

Method

Description

static String

BACKUP_STANDALONE_MASTER_PREFIX()

An identifier that backup masters use in their responses.

static String

buildLocationMetadata(scala.collection.immutable.Seq<org.apache.hadoop.fs.Path> paths, int stopAppendingThreshold)

Convert a sequence of Paths to a metadata string.

static String

bytesToString(long size)

Convert a quantity in bytes to a human-readable string such as "4.0 MiB".

static String

bytesToString(scala.math.BigInt size)

static long

byteStringAsBytes(String str)

Convert a passed byte string (e.g.

static long

byteStringAsGb(String str)

Convert a passed byte string (e.g.

static long

byteStringAsKb(String str)

Convert a passed byte string (e.g.

static long

byteStringAsMb(String str)

Convert a passed byte string (e.g.

static String

checkAndGetK8sMasterUrl(String rawMasterURL)

Check the validity of the given Kubernetes master URL and return the resolved URL.

static boolean

checkCommandAvailable(String command)

Check if a command is available.

static void

checkHost(String host)

Checks if the host contains only valid hostname/ip without port NOTE: Incase of IPV6 ip it should be enclosed inside []

static void

checkHostPort(String hostPort)

static long

checkOffHeapEnabled(SparkConf sparkConf, long offHeapSize)

return 0 if MEMORY_OFFHEAP_ENABLED is false.

static boolean

chmod700(File file)

JDK equivalent of chmod 700 file.

static <C> Class<C>

classForName(String className, boolean initialize, boolean noSparkClassLoader)

static <C> boolean

classForName$default$2()

static <C> boolean

classForName$default$3()

static boolean

classIsLoadable(String clazz)

static boolean

classIsLoadableAndAssignableFrom(String clazz, Class<?> targetClass)

static <T> T

clone(T value, SerializerInstance serializer, scala.reflect.ClassTag<T> evidence$2)

Clone an object using a Spark serializer.

static Properties

cloneProperties(Properties props)

Create a new properties object with the same values as `props`

static void

copyFileStreamNIO(FileChannel input, WritableByteChannel output, long startPosition, long bytesToCopy)

static long

copyStream(InputStream in, OutputStream out, boolean closeStreams, boolean transferToEnabled)

static boolean

copyStream$default$3()

static boolean

copyStream$default$4()

static InputStream

copyStreamUpTo(InputStream in, long maxSize)

Copy the first maxSize bytes of data from the InputStream to an in-memory buffer, primarily to check for corruption.

static boolean

createDirectory(File dir)

static File

createDirectory(String root, String namePrefix)

static String

createDirectory$default$2()

static org.apache.spark.internal.MessageWithContext

createFailedToGetTokenMessage(String serviceName, Throwable e)

Returns a string message about delegation token generation failure

static <R extends Closeable> R

createResourceUninterruptiblyIfInTaskThread(scala.Function0<R> createResource)

Create a resource uninterruptibly if we are in a task thread (i.e., TaskContext.get() != null).

static String

createSecret(SparkConf conf)

static File

createTempDir(String root, String namePrefix)

Create a temporary directory inside the given parent directory.

static String

createTempDir$default$1()

static String

createTempDir$default$2()

static String

decodeFileNameInURI(URI uri)

Get the file name from uri's raw path and decode it.

static int

DEFAULT_DRIVER_MEM_MB()

Define a default value for driver memory here since this value is referenced across the code base and nearly all files already use Utils.scala

static void

deleteRecursively(File file)

Delete a file or directory and its contents recursively.

static <T> T

deserialize(byte[] bytes)

static <T> T

deserialize(byte[] bytes, ClassLoader loader)

static long

deserializeLongValue(byte[] bytes)

Deserialize a Long value (used for PythonPartitioner)

static void

deserializeViaNestedStream(InputStream is, SerializerInstance ser, scala.Function1<DeserializationStream,scala.runtime.BoxedUnit> f)

Deserialize via nested stream using specific serializer

static boolean

doesDirectoryContainAnyNewFiles(File dir, long cutoff)

Determines if a directory contains any files newer than cutoff seconds.

static File

doFetchFile(String url, File targetDir, String filename, SparkConf conf, org.apache.hadoop.conf.Configuration hadoopConf)

Download a file or directory to target directory.

static <T> scala.util.Try<T>

doTryWithCallerStacktrace(scala.Function0<T> f)

Use Try with stacktrace substitution for the caller retrieving the error.

static scala.collection.immutable.Set<String>

EMPTY_USER_GROUPS()

static String

encodeFileNameToURIRawPath(String fileName)

A file name may contain some invalid URI characters, such as " ".

static String

encodeRelativeUnixPathToURIRawPath(String path)

Same as encodeFileNameToURIRawPath(java.lang.String) but returns the relative UNIX path.

static String

exceptionString(Throwable e)

Return a nice string representation of the exception.

static String

executeAndGetOutput(scala.collection.immutable.Seq<String> command, File workingDir, scala.collection.Map<String,String> extraEnvironment, boolean redirectStderr)

Execute a command and get its output, throwing an exception if it yields a code other than 0.

static Process

executeCommand(scala.collection.immutable.Seq<String> command, File workingDir, scala.collection.Map<String,String> extraEnvironment, boolean redirectStderr)

Execute a command and return the process running the command.

static int

executorOffHeapMemorySizeAsMb(SparkConf sparkConf)

Convert MEMORY_OFFHEAP_SIZE to MB Unit, return 0 if MEMORY_OFFHEAP_ENABLED is false.

static scala.Tuple2<String,Object>

extractHostPortFromSparkUrl(String sparkUrl)

Return a pair of host and port extracted from the sparkUrl.

static File

fetchFile(String url, File targetDir, SparkConf conf, org.apache.hadoop.conf.Configuration hadoopConf, long timestamp, boolean useCache, boolean shouldUntar)

Download a file or directory to target directory.

static org.apache.spark.util.CallSite

getCallSite(scala.Function1<String,Object> skipClass)

When called inside a class in the spark package, returns the name of the user code class (outside the spark package) that called into Spark, as well as which Spark method they called.

static String[]

getConfiguredLocalDirs(SparkConf conf)

Return the configured local directories where Spark can write files.

static ClassLoader

getContextOrSparkClassLoader()

static scala.collection.immutable.Set<String>

getCurrentUserGroups(SparkConf sparkConf, String username)

static String

getCurrentUserName()

Returns the current user name.

static String

getDefaultPropertiesFile(scala.collection.Map<String,String> env)

Return the path of the default Spark properties file.

static int

getDynamicAllocationInitialExecutors(SparkConf conf)

Return the initial number of executors for dynamic allocation.

static long

getFileLength(File file, SparkConf workConf)

Return the file length, if the file is compressed it returns the uncompressed file length.

static String

getFormattedClassName(Object obj)

static org.apache.hadoop.fs.FileSystem

getHadoopFileSystem(String path, org.apache.hadoop.conf.Configuration conf)

Return a Hadoop FileSystem with the scheme encoded in the given path.

static org.apache.hadoop.fs.FileSystem

getHadoopFileSystem(URI path, org.apache.hadoop.conf.Configuration conf)

Return a Hadoop FileSystem with the scheme encoded in the given path.

static String[]

getHeapHistogram()

Return a heap dump.

static long

getIteratorSize(scala.collection.Iterator<Object> iterator)

Counts the number of elements of an iterator.

static <T> scala.collection.Iterator<scala.Tuple2<T,Object>>

getIteratorZipWithIndex(scala.collection.Iterator<T> iter, long startIndex)

Generate a zipWithIndex iterator, avoid index value overflowing problem in scala's zipWithIndex

static String

getLocalDir(SparkConf conf)

Get the path of a temporary directory.

static scala.collection.immutable.Seq<String>

getLocalUserJarsForShell(SparkConf conf)

Return the local jar files which will be added to REPL's classpath.

static String

getLogLevel()

Get current log level

static String

getProcessName()

Returns the name of this JVM process.

static scala.collection.Map<String,String>

getPropertiesFromFile(String filename)

Load properties present in the given file.

static String

getSimpleName(Class<?> cls)

static ClassLoader

getSparkClassLoader()

static String

getSparkOrYarnConfig(SparkConf conf, String key, String default_)

Return the value of a config either through the SparkConf or the Hadoop configuration.

static scala.Option<String>

getStderr(Process process, long timeoutMs)

Return the stderr of a process after waiting for the process to terminate.

static scala.collection.Map<String,String>

getSystemProperties()

Returns the system properties map that is thread-safe to iterator over.

static ThreadStackTrace[]

getThreadDump()

Return a thread dump of all threads' stacktraces.

static scala.Option<ThreadStackTrace>

getThreadDumpForThread(long threadId)

static <T> T

getTryWithCallerStacktrace(scala.util.Try<T> t)

Retrieve the result of Try that was created by doTryWithCallerStacktrace.

static jakarta.ws.rs.core.UriBuilder

getUriBuilder(String uri)

Create a UriBuilder from URI string.

static jakarta.ws.rs.core.UriBuilder

getUriBuilder(URI uri)

Create a UriBuilder from URI object.

static String

getUsedTimeNs(long startTimeNs)

Return the string to tell how long has passed in milliseconds.

static scala.collection.immutable.Seq<String>

getUserJars(SparkConf conf)

Return the jar files pointed by the "spark.jars" property.

static void

initDaemon(org.slf4j.Logger log)

Utility function that should be called early in main() for daemons to set up some common diagnostic state.

static <T> T

instantiateSerializerFromConf(org.apache.spark.internal.config.ConfigEntry<String> propertyName, SparkConf conf, boolean isDriver)

static <T> T

instantiateSerializerOrShuffleManager(String className, SparkConf conf, boolean isDriver)

static boolean

isBindCollision(Throwable exception)

Return whether the exception is caused by an address-port collision when binding.

static boolean

isClientMode(SparkConf conf)

static boolean

isDynamicAllocationEnabled(ReadOnlySparkConf conf)

Return whether dynamic allocation is enabled in the given conf.

static boolean

isFatalError(Throwable e)

Returns true if the given exception was fatal.

static boolean

isFileSplittable(org.apache.hadoop.fs.Path path, org.apache.hadoop.io.compress.CompressionCodecFactory codecFactory)

Check whether the file of the path is splittable.

static boolean

isG1GC()

static boolean

isInDirectory(File parent, File child)

Return whether the specified file is a parent directory of the child file.

static boolean

isInRunningSparkTask()

Returns if the current codes are running in a Spark task, e.g., in executors.

static boolean

isJavaVersionAtLeast21()

Whether the underlying Java version is at least 21.

static boolean

isLocalMaster(ReadOnlySparkConf conf)

static boolean

isLocalUri(String uri)

Returns whether the URI is a "local:" URI.

static boolean

isMac()

Whether the underlying operating system is Mac OS X.

static boolean

isMacOnAppleSilicon()

Whether the underlying operating system is Mac OS X and processor is Apple Silicon.

static boolean

isPushBasedShuffleEnabled(SparkConf conf, boolean isDriver, boolean checkSerializer)

Push based shuffle can only be enabled when below conditions are met: - the application is submitted to run in YARN mode - external shuffle service enabled - IO encryption disabled - serializer(such as KryoSerializer) supports relocation of serialized objects

static boolean

isStreamingDynamicAllocationEnabled(SparkConf conf)

static boolean

isTesting()

static boolean

isWindows()

Whether the underlying operating system is Windows.

static String

libraryPathEnvName()

Return the current system LD_LIBRARY_PATH name

static String

libraryPathEnvPrefix(scala.collection.immutable.Seq<String> libraryPaths)

Return the prefix of a command that appends the given library paths to the system-specific library path environment variable.

static String

loadDefaultSparkProperties(SparkConf conf, String filePath)

Load default Spark properties from the given file.

static <T> scala.collection.immutable.Seq<T>

loadExtensions(Class<T> extClass, scala.collection.immutable.Seq<String> classes, SparkConf conf)

Create instances of extension classes.

static String

LOCAL_SCHEME()

Scheme used for files that are locally available on worker nodes in the cluster.

static String

localCanonicalHostName()

Get the local machine's FQDN.

static String

localHostName()

Get the local machine's hostname.

static String

localHostNameForURI()

Get the local machine's URI.

static org.apache.spark.internal.Logging.LogStringContext

LogStringContext(scala.StringContext sc)

static <T> T

logUncaughtExceptions(scala.Function0<T> f)

Execute the given block, logging and re-throwing any uncaught exception.

static int

MAX_DIR_CREATION_ATTEMPTS()

static long

median(long[] sizes, boolean alreadySorted)

Return the median number of a long array

static String

megabytesToString(long megabytes)

Convert a quantity in megabytes to a human-readable string such as "4.0 MiB".

static int

memoryStringToMb(String str)

Convert a Java memory parameter passed to -Xmx (such as 300m or 1g) to a number of mebibytes.

static String

msDurationToString(long ms)

Returns a human-readable string representing a duration such as "35ms"

static String

nameForAppAndAttempt(String appId, scala.Option<String> appAttemptId)

static String[]

nonLocalPaths(String paths, boolean testWindows)

Return all non-local paths from a comma-separated list of paths.

static int

nonNegativeHash(Object obj)

static int

nonNegativeMod(int x, int mod)

static String

offsetBytes(String path, long length, long start, long end)

Return a string containing part of a file from byte 'start' to 'end'.

static String

offsetBytes(scala.collection.immutable.Seq<File> files, scala.collection.immutable.Seq<Object> fileLengths, long start, long end)

Return a string containing data across a set of files.

static org.slf4j.Logger

org$apache$spark$internal$Logging$$log_()

static void

org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1)

static scala.Tuple2<String,Object>

parseHostPort(String hostPort)

static String[]

parseStandaloneMasterUrls(String masterUrls)

Split the comma delimited string of master URLs into a list.

static int

portMaxRetries(SparkConf conf)

Maximum number of retries when binding to a port before giving up.

static boolean

preferIPv6()

Whether the underlying JVM prefer IPv6 addresses.

static Thread

processStreamByLine(String threadName, InputStream inputStream, scala.Function1<String,scala.runtime.BoxedUnit> processLine)

Return and start a daemon thread that processes the content of the input stream line by line.

static Random

random()

static <T> scala.collection.immutable.Seq<T>

randomize(scala.collection.IterableOnce<T> seq, scala.reflect.ClassTag<T> evidence$1)

Shuffle the elements of a collection into a random order, returning the result in a new collection.

static <T> Object

randomizeInPlace(Object arr, Random rand)

Shuffle the elements of an array into a random order, modifying the original array.

static File[]

recursiveList(File f)

static scala.collection.Seq<scala.Tuple2<String,String>>

redact(SparkConf conf, scala.collection.Seq<scala.Tuple2<String,String>> kvs)

Redact the sensitive values in the given map.

static scala.collection.Seq<scala.Tuple2<String,String>>

redact(scala.collection.Map<String,String> kvs)

Looks up the redaction regex from within the key value pairs and uses it to redact the rest of the key value pairs.

static String

redact(scala.Option<scala.util.matching.Regex> regex, String text)

Redact the sensitive information in the given string.

static <K, V> scala.collection.Seq<scala.Tuple2<K,V>>

redact(scala.Option<scala.util.matching.Regex> regex, scala.collection.Seq<scala.Tuple2<K,V>> kvs)

Redact the sensitive values in the given map.

static scala.collection.immutable.Seq<String>

redactCommandLineArgs(SparkConf conf, scala.collection.immutable.Seq<String> commands)

static void

resetStructuredLogging()

Utility function to enable or disable structured logging based on system properties.

static void

resetStructuredLogging(SparkConf sparkConf)

Utility function to enable or disable structured logging based on SparkConf.

static URI

resolveURI(String path)

static String

resolveURIs(String paths)

Resolve a comma-separated list of paths.

static boolean

responseFromBackup(String msg)

Return true if the response message is sent from a backup Master on standby.

static String

sanitizeDirName(String str)

static <T> byte[]

serialize(T o)

static void

serializeViaNestedStream(OutputStream os, SerializerInstance ser, scala.Function1<SerializationStream,scala.runtime.BoxedUnit> f)

Serialize via nested stream using specific serializer

static void

setCustomHostname(String hostname)

Allow setting a custom host name

static void

setLogLevel(org.apache.logging.log4j.Level l)

configure a new log4j level

static void

setLogLevelIfNeeded(String newLogLevel)

static scala.collection.immutable.Seq<String>

sparkJavaOpts(SparkConf conf, scala.Function1<String,Object> filterKey)

Convert all spark properties set in the given SparkConf to a sequence of java options.

static scala.collection.immutable.Seq<String>

splitCommandString(String s)

Split a string of potentially quoted arguments from the command line the way that a shell would do it to determine arguments to a command.

static String

stackTraceToString(Throwable t)

static <T> scala.Tuple2<T,Object>

startServiceOnPort(int startPort, scala.Function1<Object,scala.Tuple2<T,Object>> startService, int maxRetries, String serviceName)

Attempt to start a service on the given port, or fail after a number of attempts.

static <T> scala.Tuple2<T,Object>

startServiceOnPort(int startPort, scala.Function1<Object,scala.Tuple2<T,Object>> startService, SparkConf conf, String serviceName)

Attempt to start a service on the given port, or fail after a number of attempts.

static int

stringHalfWidth(String str)

Return the number of half widths in a given string.

static scala.collection.immutable.Seq<String>

stringToSeq(String str)

static final String

stripDollars(String s)

static String

substituteAppId(String opt, String appId)

Replaces all the {{APP_ID}} occurrences with the App Id.

static String

substituteAppNExecIds(String opt, String appId, String execId)

Replaces all the {{EXECUTOR_ID}} occurrences with the Executor Id and {{APP_ID}} occurrences with the App Id.

static void

symlink(File src, File dst)

Creates a symlink.

static File

tempFileWith(File path)

Returns a path of temporary file which is in the same directory with path.

static scala.Option<Object>

terminateProcess(Process process, long timeoutMs)

Terminates a process waiting for at most the specified duration.

static long

timeIt(int numIters, scala.Function0<scala.runtime.BoxedUnit> f, scala.Option<scala.Function0<scala.runtime.BoxedUnit>> prepare)

Timing method based on iterations that permit JVM JIT optimization.

static void

times(int numIters, scala.Function0<scala.runtime.BoxedUnit> f)

Method executed for repeating a task for side effects.

static long

timeStringAsMs(String str)

Convert a time parameter such as (50s, 100ms, or 250us) to milliseconds for internal use.

static long

timeStringAsSeconds(String str)

Convert a time parameter such as (50s, 100ms, or 250us) to seconds for internal use.

static <T> scala.Tuple2<T,Object>

timeTakenMs(scala.Function0<T> body)

Records the duration of running `body`.

static String

TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE()

static <R extends Closeable, T> T

tryInitializeResource(scala.Function0<R> createResource, scala.Function1<R,T> initialize)

static <T> scala.util.Try<T>

tryLog(scala.Function0<T> f)

Executes the given block in a Try, logging any uncaught exceptions.

static void

tryLogNonFatalError(scala.Function0<scala.runtime.BoxedUnit> block)

Executes the given block.

static void

tryOrExit(scala.Function0<scala.runtime.BoxedUnit> block)

Execute a block of code that evaluates to Unit, forwarding any uncaught exceptions to the default UncaughtExceptionHandler

static <T> T

tryOrIOException(scala.Function0<T> block)

static void

tryOrStopSparkContext(SparkContext sc, scala.Function0<scala.runtime.BoxedUnit> block)

Execute a block of code that evaluates to Unit, stop SparkContext if there is any uncaught exception

static <R extends Closeable, T> T

tryWithResource(scala.Function0<R> createResource, scala.Function1<R,T> f)

static <T> T

tryWithSafeFinally(scala.Function0<T> block, scala.Function0<scala.runtime.BoxedUnit> finallyBlock)

static <T> T

tryWithSafeFinallyAndFailureCallbacks(scala.Function0<T> block, scala.Function0<scala.runtime.BoxedUnit> catchBlock, scala.Function0<scala.runtime.BoxedUnit> finallyBlock)

Execute a block of code and call the failure callbacks in the catch block.

static void

unpack(File source, File dest)

Unpacks an archive file into the specified directory.

static scala.collection.immutable.Seq<File>

unzipFilesFromFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dfsZipFile, File localDir)

Decompress a zip file into a local dir.

static int

userPort(int base, int offset)

Returns the user port to try when trying to bind a service.

static void

validateURL(URI uri)

Validate that a given URI is actually a valid URL as well.

static String

weakIntern(String s)

String interning to reduce the memory usage.

static scala.util.matching.Regex

windowsDrive()

Pattern for matching a Windows drive, which contains only a single alphabet character.

static <T> T

withContextClassLoader(ClassLoader ctxClassLoader, scala.Function0<T> fn)

Run a segment of code using a different context class loader in the current thread

static <T> T

withDummyCallSite(SparkContext sc, scala.Function0<T> body)

To avoid calling Utils.getCallSite for every single RDD we create in the body, set a dummy call site that RDDs use instead.

static void

writeByteBuffer(ByteBuffer bb, DataOutput out)

Primitive often used when writing ByteBuffer to DataOutput

static void

writeByteBuffer(ByteBuffer bb, OutputStream out)

Primitive often used when writing ByteBuffer to OutputStream

Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait

Constructor Details
- Utils
  
  public Utils()
Method Details
- random
  
  public static Random random()
- DEFAULT_DRIVER_MEM_MB
  
  public static int DEFAULT_DRIVER_MEM_MB()
  
  Define a default value for driver memory here since this value is referenced across the code base and nearly all files already use Utils.scala
  
  Returns:
  
  (undocumented)
- MAX_DIR_CREATION_ATTEMPTS
  
  public static int MAX_DIR_CREATION_ATTEMPTS()
- LOCAL_SCHEME
  
  public static String LOCAL_SCHEME()
  
  Scheme used for files that are locally available on worker nodes in the cluster.
- deserializeLongValue
  
  public static long deserializeLongValue(byte[] bytes)
  
  Deserialize a Long value (used for PythonPartitioner)
- serializeViaNestedStream
  
  public static void serializeViaNestedStream(OutputStream os, SerializerInstance ser, scala.Function1<SerializationStream,scala.runtime.BoxedUnit> f)
  
  Serialize via nested stream using specific serializer
- deserializeViaNestedStream
  
  public static void deserializeViaNestedStream(InputStream is, SerializerInstance ser, scala.Function1<DeserializationStream,scala.runtime.BoxedUnit> f)
  
  Deserialize via nested stream using specific serializer
- weakIntern
  
  public static String weakIntern(String s)
  
  String interning to reduce the memory usage.
- withContextClassLoader
  
  public static <T> T withContextClassLoader(ClassLoader ctxClassLoader, scala.Function0<T> fn)
  
  Run a segment of code using a different context class loader in the current thread
  
  Parameters:
  
  ctxClassLoader - (undocumented)
  
  fn - (undocumented)
  
  Returns:
  
  (undocumented)
- writeByteBuffer
  
  public static void writeByteBuffer(ByteBuffer bb, DataOutput out)
  
  Primitive often used when writing ByteBuffer to DataOutput
  
  Parameters:
  
  bb - (undocumented)
  
  out - (undocumented)
- writeByteBuffer
  
  public static void writeByteBuffer(ByteBuffer bb, OutputStream out)
  
  Primitive often used when writing ByteBuffer to OutputStream
  
  Parameters:
  
  bb - (undocumented)
  
  out - (undocumented)
- chmod700
  
  public static boolean chmod700(File file)
  
  JDK equivalent of chmod 700 file.
  
  Parameters:
  
  file - the file whose permissions will be modified
  
  Returns:
  
  true if the permissions were successfully changed, false otherwise.
- createTempDir
  
  public static File createTempDir(String root, String namePrefix)
  
  Create a temporary directory inside the given parent directory. The directory will be automatically deleted when the VM shuts down.
  
  Parameters:
  
  root - (undocumented)
  
  namePrefix - (undocumented)
  
  Returns:
  
  (undocumented)
- copyStreamUpTo
  
  public static InputStream copyStreamUpTo(InputStream in, long maxSize)
  
  Copy the first maxSize bytes of data from the InputStream to an in-memory buffer, primarily to check for corruption.
  This returns a new InputStream which contains the same data as the original input stream. It may be entirely on in-memory buffer, or it may be a combination of in-memory data, and then continue to read from the original stream. The only real use of this is if the original input stream will potentially detect corruption while the data is being read (e.g. from compression). This allows for an eager check of corruption in the first maxSize bytes of data.
  
  Parameters:
  
  in - (undocumented)
  
  maxSize - (undocumented)
  
  Returns:
  
  An InputStream which includes all data from the original stream (combining buffered data and remaining data in the original stream)
- encodeFileNameToURIRawPath
  
  public static String encodeFileNameToURIRawPath(String fileName)
  
  A file name may contain some invalid URI characters, such as " ". This method will convert the file name to a raw path accepted by java.net.URI(String).
  Note: the file name must not contain "/" or "\"
  
  Parameters:
  
  fileName - (undocumented)
  
  Returns:
  
  (undocumented)
- encodeRelativeUnixPathToURIRawPath
  
  public static String encodeRelativeUnixPathToURIRawPath(String path)
  
  Same as encodeFileNameToURIRawPath(java.lang.String) but returns the relative UNIX path.
  
  Parameters:
  
  path - (undocumented)
  
  Returns:
  
  (undocumented)
- decodeFileNameInURI
  
  public static String decodeFileNameInURI(URI uri)
  
  Get the file name from uri's raw path and decode it. If the raw path of uri ends with "/", return the name before the last "/".
  
  Parameters:
  
  uri - (undocumented)
  
  Returns:
  
  (undocumented)
- fetchFile
  
  public static File fetchFile(String url, File targetDir, SparkConf conf, org.apache.hadoop.conf.Configuration hadoopConf, long timestamp, boolean useCache, boolean shouldUntar)
  
  Download a file or directory to target directory. Supports fetching the file in a variety of ways, including HTTP, Hadoop-compatible filesystems, and files on a standard filesystem, based on the URL parameter. Fetching directories is only supported from Hadoop-compatible filesystems.
  If useCache is true, first attempts to fetch the file to a local cache that's shared across executors running the same application. useCache is used mainly for the executors, and not in local mode.
  Throws SparkException if the target file already exists and has different contents than the requested file.
  If shouldUntar is true, it untars the given url if it is a tar.gz or tgz into targetDir. This is a legacy behavior, and users should better use spark.archives configuration or SparkContext.addArchive
  
  Parameters:
  
  url - (undocumented)
  
  targetDir - (undocumented)
  
  conf - (undocumented)
  
  hadoopConf - (undocumented)
  
  timestamp - (undocumented)
  
  useCache - (undocumented)
  
  shouldUntar - (undocumented)
  
  Returns:
  
  (undocumented)
- unpack
  
  public static void unpack(File source, File dest)
  
  Unpacks an archive file into the specified directory. It expects .jar, .zip, .tar.gz, .tgz and .tar files. This behaves same as Hadoop's archive in distributed cache. This method is basically copied from org.apache.hadoop.yarn.util.FSDownload.unpack.
  
  Parameters:
  
  source - (undocumented)
  
  dest - (undocumented)
- timeTakenMs
  
  public static <T> scala.Tuple2<T,Object> timeTakenMs(scala.Function0<T> body)
  
  Records the duration of running `body`.
- doFetchFile
  
  public static File doFetchFile(String url, File targetDir, String filename, SparkConf conf, org.apache.hadoop.conf.Configuration hadoopConf)
  
  Download a file or directory to target directory. Supports fetching the file in a variety of ways, including HTTP, Hadoop-compatible filesystems, and files on a standard filesystem, based on the URL parameter. Fetching directories is only supported from Hadoop-compatible filesystems.
  Throws SparkException if the target file already exists and has different contents than the requested file.
  
  Parameters:
  
  url - (undocumented)
  
  targetDir - (undocumented)
  
  filename - (undocumented)
  
  conf - (undocumented)
  
  hadoopConf - (undocumented)
  
  Returns:
  
  (undocumented)
- validateURL
  
  public static void validateURL(URI uri) throws MalformedURLException
  
  Validate that a given URI is actually a valid URL as well.
  
  Parameters:
  
  uri - The URI to validate
  
  Throws:
  
  MalformedURLException
- getLocalDir
  
  public static String getLocalDir(SparkConf conf)
  
  Get the path of a temporary directory. Spark's local directories can be configured through multiple settings, which are used with the following precedence:
  - If called from inside of a YARN container, this will return a directory chosen by YARN. - If the SPARK_LOCAL_DIRS environment variable is set, this will return a directory from it. - Otherwise, if the spark.local.dir is set, this will return a directory from it. - Otherwise, this will return java.io.tmpdir.
  Some of these configuration options might be lists of multiple paths, but this method will always return a single directory. The return directory is chosen randomly from the array of directories it gets from getOrCreateLocalRootDirs.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- isInRunningSparkTask
  
  public static boolean isInRunningSparkTask()
  
  Returns if the current codes are running in a Spark task, e.g., in executors.
  
  Returns:
  
  (undocumented)
- getConfiguredLocalDirs
  
  public static String[] getConfiguredLocalDirs(SparkConf conf)
  
  Return the configured local directories where Spark can write files. This method does not create any directories on its own, it only encapsulates the logic of locating the local directories according to deployment mode.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- randomize
  
  public static <T> scala.collection.immutable.Seq<T> randomize(scala.collection.IterableOnce<T> seq, scala.reflect.ClassTag<T> evidence$1)
  
  Shuffle the elements of a collection into a random order, returning the result in a new collection. Unlike scala.util.Random.shuffle, this method uses a local random number generator, avoiding inter-thread contention.
  
  Parameters:
  
  seq - (undocumented)
  
  evidence$1 - (undocumented)
  
  Returns:
  
  (undocumented)
- randomizeInPlace
  
  public static <T> Object randomizeInPlace(Object arr, Random rand)
  
  Shuffle the elements of an array into a random order, modifying the original array. Returns the original array.
  
  Parameters:
  
  arr - (undocumented)
  
  rand - (undocumented)
  
  Returns:
  
  (undocumented)
- setCustomHostname
  
  public static void setCustomHostname(String hostname)
  
  Allow setting a custom host name
  
  Parameters:
  
  hostname - (undocumented)
- localCanonicalHostName
  
  public static String localCanonicalHostName()
  
  Get the local machine's FQDN.
  
  Returns:
  
  (undocumented)
- localHostName
  
  public static String localHostName()
  
  Get the local machine's hostname. In case of IPv6, getHostAddress may return '0:0:0:0:0:0:0:1'.
  
  Returns:
  
  (undocumented)
- localHostNameForURI
  
  public static String localHostNameForURI()
  
  Get the local machine's URI.
  
  Returns:
  
  (undocumented)
- checkHost
  
  public static void checkHost(String host)
  
  Checks if the host contains only valid hostname/ip without port NOTE: Incase of IPV6 ip it should be enclosed inside []
  
  Parameters:
  
  host - (undocumented)
- checkHostPort
  
  public static void checkHostPort(String hostPort)
- parseHostPort
  
  public static scala.Tuple2<String,Object> parseHostPort(String hostPort)
- getUsedTimeNs
  
  public static String getUsedTimeNs(long startTimeNs)
  
  Return the string to tell how long has passed in milliseconds.
  
  Parameters:
  
  startTimeNs - - a timestamp in nanoseconds returned by System.nanoTime.
  
  Returns:
  
  (undocumented)
- deleteRecursively
  
  public static void deleteRecursively(File file)
  
  Delete a file or directory and its contents recursively. Don't follow directories if they are symlinks. Throws an exception if deletion is unsuccessful.
  
  Parameters:
  
  file - (undocumented)
- doesDirectoryContainAnyNewFiles
  
  public static boolean doesDirectoryContainAnyNewFiles(File dir, long cutoff)
  
  Determines if a directory contains any files newer than cutoff seconds.
  
  Parameters:
  
  dir - must be the path to a directory, or IllegalArgumentException is thrown
  
  cutoff - measured in seconds. Returns true if there are any files or directories in the given directory whose last modified time is later than this many seconds ago
  
  Returns:
  
  (undocumented)
- timeStringAsMs
  
  public static long timeStringAsMs(String str)
  
  Convert a time parameter such as (50s, 100ms, or 250us) to milliseconds for internal use. If no suffix is provided, the passed number is assumed to be in ms.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- timeStringAsSeconds
  
  public static long timeStringAsSeconds(String str)
  
  Convert a time parameter such as (50s, 100ms, or 250us) to seconds for internal use. If no suffix is provided, the passed number is assumed to be in seconds.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- byteStringAsBytes
  
  public static long byteStringAsBytes(String str)
  
  Convert a passed byte string (e.g. 50b, 100k, or 250m) to bytes for internal use.
  If no suffix is provided, the passed number is assumed to be in bytes.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- byteStringAsKb
  
  public static long byteStringAsKb(String str)
  
  Convert a passed byte string (e.g. 50b, 100k, or 250m) to kibibytes for internal use.
  If no suffix is provided, the passed number is assumed to be in kibibytes.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- byteStringAsMb
  
  public static long byteStringAsMb(String str)
  
  Convert a passed byte string (e.g. 50b, 100k, or 250m) to mebibytes for internal use.
  If no suffix is provided, the passed number is assumed to be in mebibytes.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- byteStringAsGb
  
  public static long byteStringAsGb(String str)
  
  Convert a passed byte string (e.g. 50b, 100k, or 250m, 500g) to gibibytes for internal use.
  If no suffix is provided, the passed number is assumed to be in gibibytes.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- memoryStringToMb
  
  public static int memoryStringToMb(String str)
  
  Convert a Java memory parameter passed to -Xmx (such as 300m or 1g) to a number of mebibytes.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- bytesToString
  
  public static String bytesToString(long size)
  
  Convert a quantity in bytes to a human-readable string such as "4.0 MiB".
  
  Parameters:
  
  size - (undocumented)
  
  Returns:
  
  (undocumented)
- bytesToString
  
  public static String bytesToString(scala.math.BigInt size)
- msDurationToString
  
  public static String msDurationToString(long ms)
  
  Returns a human-readable string representing a duration such as "35ms"
  
  Parameters:
  
  ms - (undocumented)
  
  Returns:
  
  (undocumented)
- megabytesToString
  
  public static String megabytesToString(long megabytes)
  
  Convert a quantity in megabytes to a human-readable string such as "4.0 MiB".
  
  Parameters:
  
  megabytes - (undocumented)
  
  Returns:
  
  (undocumented)
- executeCommand
  
  public static Process executeCommand(scala.collection.immutable.Seq<String> command, File workingDir, scala.collection.Map<String,String> extraEnvironment, boolean redirectStderr)
  
  Execute a command and return the process running the command.
  
  Parameters:
  
  command - (undocumented)
  
  workingDir - (undocumented)
  
  extraEnvironment - (undocumented)
  
  redirectStderr - (undocumented)
  
  Returns:
  
  (undocumented)
- executeAndGetOutput
  
  public static String executeAndGetOutput(scala.collection.immutable.Seq<String> command, File workingDir, scala.collection.Map<String,String> extraEnvironment, boolean redirectStderr)
  
  Execute a command and get its output, throwing an exception if it yields a code other than 0.
  
  Parameters:
  
  command - (undocumented)
  
  workingDir - (undocumented)
  
  extraEnvironment - (undocumented)
  
  redirectStderr - (undocumented)
  
  Returns:
  
  (undocumented)
- processStreamByLine
  
  public static Thread processStreamByLine(String threadName, InputStream inputStream, scala.Function1<String,scala.runtime.BoxedUnit> processLine)
  
  Return and start a daemon thread that processes the content of the input stream line by line.
  
  Parameters:
  
  threadName - (undocumented)
  
  inputStream - (undocumented)
  
  processLine - (undocumented)
  
  Returns:
  
  (undocumented)
- tryOrExit
  
  public static void tryOrExit(scala.Function0<scala.runtime.BoxedUnit> block)
  
  Execute a block of code that evaluates to Unit, forwarding any uncaught exceptions to the default UncaughtExceptionHandler
  NOTE: This method is to be called by the spark-started JVM process.
  
  Parameters:
  
  block - (undocumented)
- tryOrStopSparkContext
  
  public static void tryOrStopSparkContext(SparkContext sc, scala.Function0<scala.runtime.BoxedUnit> block)
  
  Execute a block of code that evaluates to Unit, stop SparkContext if there is any uncaught exception
  NOTE: This method is to be called by the driver-side components to avoid stopping the user-started JVM process completely; in contrast, tryOrExit is to be called in the spark-started JVM process .
  
  Parameters:
  
  sc - (undocumented)
  
  block - (undocumented)
- tryLogNonFatalError
  
  public static void tryLogNonFatalError(scala.Function0<scala.runtime.BoxedUnit> block)
  
  Executes the given block. Log non-fatal errors if any, and only throw fatal errors
- tryWithSafeFinallyAndFailureCallbacks
  
  public static <T> T tryWithSafeFinallyAndFailureCallbacks(scala.Function0<T> block, scala.Function0<scala.runtime.BoxedUnit> catchBlock, scala.Function0<scala.runtime.BoxedUnit> finallyBlock)
  
  Execute a block of code and call the failure callbacks in the catch block. If exceptions occur in either the catch or the finally block, they are appended to the list of suppressed exceptions in original exception which is then rethrown.
  This is primarily an issue with catch { abort() } or finally { out.close() } blocks, where the abort/close needs to be called to clean up out, but if an exception happened in out.write, it's likely out may be corrupted and abort or out.close will fail as well. This would then suppress the original/likely more meaningful exception from the original out.write call.
  
  Parameters:
  
  block - (undocumented)
  
  catchBlock - (undocumented)
  
  finallyBlock - (undocumented)
  
  Returns:
  
  (undocumented)
- TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE
  
  public static String TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE()
- doTryWithCallerStacktrace
  
  public static <T> scala.util.Try<T> doTryWithCallerStacktrace(scala.Function0<T> f)
  
  Use Try with stacktrace substitution for the caller retrieving the error.
  Normally in case of failure, the exception would have the stacktrace of the caller that originally called doTryWithCallerStacktrace. However, we want to replace the part above this function with the stacktrace of the caller who calls getTryWithCallerStacktrace. So here we save the part of the stacktrace below doTryWithCallerStacktrace, and getTryWithCallerStacktrace will stitch it with the new stack trace of the caller. The full original stack trace is kept in ex.getSuppressed.
  
  Parameters:
  
  f - Code block to be wrapped in Try
  
  Returns:
  
  Try with Success or Failure of the code block. Use with getTryWithCallerStacktrace.
- getTryWithCallerStacktrace
  
  public static <T> T getTryWithCallerStacktrace(scala.util.Try<T> t)
  
  Retrieve the result of Try that was created by doTryWithCallerStacktrace.
  In case of failure, the resulting exception has a stack trace that combines the stack trace below the original doTryWithCallerStacktrace which triggered it, with the caller stack trace of the current caller of getTryWithCallerStacktrace.
  Full stack trace of the original doTryWithCallerStacktrace caller can be retrieved with ex.getSuppressed.find { e => e.isInstanceOf[Utils.OriginalTryStackTraceException] }
  
  Parameters:
  
  t - Try from doTryWithCallerStacktrace
  
  Returns:
  
  Result of the Try or rethrows the failure exception with modified stacktrace.
- getCallSite
  
  public static org.apache.spark.util.CallSite getCallSite(scala.Function1<String,Object> skipClass)
  
  When called inside a class in the spark package, returns the name of the user code class (outside the spark package) that called into Spark, as well as which Spark method they called. This is used, for example, to tell users where in their code each RDD got created.
  
  Parameters:
  
  skipClass - Function that is used to exclude non-user-code classes.
  
  Returns:
  
  (undocumented)
- getFileLength
  
  public static long getFileLength(File file, SparkConf workConf)
  
  Return the file length, if the file is compressed it returns the uncompressed file length. It also caches the uncompressed file size to avoid repeated decompression. The cache size is read from workerConf.
  
  Parameters:
  
  file - (undocumented)
  
  workConf - (undocumented)
  
  Returns:
  
  (undocumented)
- offsetBytes
  
  public static String offsetBytes(String path, long length, long start, long end)
  
  Return a string containing part of a file from byte 'start' to 'end'.
- offsetBytes
  
  public static String offsetBytes(scala.collection.immutable.Seq<File> files, scala.collection.immutable.Seq<Object> fileLengths, long start, long end)
  
  Return a string containing data across a set of files. The startIndex and endIndex is based on the cumulative size of all the files take in the given order. See figure below for more details.
  
  Parameters:
  
  files - (undocumented)
  
  fileLengths - (undocumented)
  
  start - (undocumented)
  
  end - (undocumented)
  
  Returns:
  
  (undocumented)
- clone
  
  public static <T> T clone(T value, SerializerInstance serializer, scala.reflect.ClassTag<T> evidence$2)
  
  Clone an object using a Spark serializer.
  
  Parameters:
  
  value - (undocumented)
  
  serializer - (undocumented)
  
  evidence$2 - (undocumented)
  
  Returns:
  
  (undocumented)
- splitCommandString
  
  public static scala.collection.immutable.Seq<String> splitCommandString(String s)
  
  Split a string of potentially quoted arguments from the command line the way that a shell would do it to determine arguments to a command. For example, if the string is 'a "b c" d', then it would be parsed as three arguments: 'a', 'b c' and 'd'.
  
  Parameters:
  
  s - (undocumented)
  
  Returns:
  
  (undocumented)
- nonNegativeMod
  
  public static int nonNegativeMod(int x, int mod)
- nonNegativeHash
  
  public static int nonNegativeHash(Object obj)
- getSystemProperties
  
  public static scala.collection.Map<String,String> getSystemProperties()
  
  Returns the system properties map that is thread-safe to iterator over. It gets the properties which have been set explicitly, as well as those for which only a default value has been defined.
  
  Returns:
  
  (undocumented)
- times
  
  public static void times(int numIters, scala.Function0<scala.runtime.BoxedUnit> f)
  
  Method executed for repeating a task for side effects. Unlike a for comprehension, it permits JVM JIT optimization
  
  Parameters:
  
  numIters - (undocumented)
  
  f - (undocumented)
- timeIt
  
  public static long timeIt(int numIters, scala.Function0<scala.runtime.BoxedUnit> f, scala.Option<scala.Function0<scala.runtime.BoxedUnit>> prepare)
  
  Timing method based on iterations that permit JVM JIT optimization.
  
  Parameters:
  
  numIters - number of iterations
  
  f - function to be executed. If prepare is not None, the running time of each call to f must be an order of magnitude longer than one nanosecond for accurate timing.
  
  prepare - function to be executed before each call to f. Its running time doesn't count.
  
  Returns:
  
  the total time across all iterations (not counting preparation time) in nanoseconds.
- getIteratorSize
  
  public static long getIteratorSize(scala.collection.Iterator<Object> iterator)
  
  Counts the number of elements of an iterator.
  
  Parameters:
  
  iterator - (undocumented)
  
  Returns:
  
  (undocumented)
- getIteratorZipWithIndex
  
  public static <T> scala.collection.Iterator<scala.Tuple2<T,Object>> getIteratorZipWithIndex(scala.collection.Iterator<T> iter, long startIndex)
  
  Generate a zipWithIndex iterator, avoid index value overflowing problem in scala's zipWithIndex
  
  Parameters:
  
  iter - (undocumented)
  
  startIndex - (undocumented)
  
  Returns:
  
  (undocumented)
- symlink
  
  public static void symlink(File src, File dst)
  
  Creates a symlink.
  
  Parameters:
  
  src - absolute path to the source
  
  dst - relative path for the destination
- getHadoopFileSystem
  
  public static org.apache.hadoop.fs.FileSystem getHadoopFileSystem(URI path, org.apache.hadoop.conf.Configuration conf)
  
  Return a Hadoop FileSystem with the scheme encoded in the given path.
  
  Parameters:
  
  path - (undocumented)
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- getHadoopFileSystem
  
  public static org.apache.hadoop.fs.FileSystem getHadoopFileSystem(String path, org.apache.hadoop.conf.Configuration conf)
  
  Return a Hadoop FileSystem with the scheme encoded in the given path.
  
  Parameters:
  
  path - (undocumented)
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- isWindows
  
  public static boolean isWindows()
  
  Whether the underlying operating system is Windows.
  
  Returns:
  
  (undocumented)
- isMac
  
  public static boolean isMac()
  
  Whether the underlying operating system is Mac OS X.
  
  Returns:
  
  (undocumented)
- isJavaVersionAtLeast21
  
  public static boolean isJavaVersionAtLeast21()
  
  Whether the underlying Java version is at least 21.
  
  Returns:
  
  (undocumented)
- isMacOnAppleSilicon
  
  public static boolean isMacOnAppleSilicon()
  
  Whether the underlying operating system is Mac OS X and processor is Apple Silicon.
  
  Returns:
  
  (undocumented)
- preferIPv6
  
  public static boolean preferIPv6()
  
  Whether the underlying JVM prefer IPv6 addresses.
  
  Returns:
  
  (undocumented)
- windowsDrive
  
  public static scala.util.matching.Regex windowsDrive()
  
  Pattern for matching a Windows drive, which contains only a single alphabet character.
  
  Returns:
  
  (undocumented)
- terminateProcess
  
  public static scala.Option<Object> terminateProcess(Process process, long timeoutMs)
  
  Terminates a process waiting for at most the specified duration.
  
  Parameters:
  
  process - (undocumented)
  
  timeoutMs - (undocumented)
  
  Returns:
  
  the process exit value if it was successfully terminated, else None
- getStderr
  
  public static scala.Option<String> getStderr(Process process, long timeoutMs)
  
  Return the stderr of a process after waiting for the process to terminate. If the process does not terminate within the specified timeout, return None.
  
  Parameters:
  
  process - (undocumented)
  
  timeoutMs - (undocumented)
  
  Returns:
  
  (undocumented)
- logUncaughtExceptions
  
  public static <T> T logUncaughtExceptions(scala.Function0<T> f)
  
  Execute the given block, logging and re-throwing any uncaught exception. This is particularly useful for wrapping code that runs in a thread, to ensure that exceptions are printed, and to avoid having to catch Throwable.
  
  Parameters:
  
  f - (undocumented)
  
  Returns:
  
  (undocumented)
- tryLog
  
  public static <T> scala.util.Try<T> tryLog(scala.Function0<T> f)
  
  Executes the given block in a Try, logging any uncaught exceptions.
- isFatalError
  
  public static boolean isFatalError(Throwable e)
  
  Returns true if the given exception was fatal. See docs for scala.util.control.NonFatal.
- resolveURIs
  
  public static String resolveURIs(String paths)
  
  Resolve a comma-separated list of paths.
- nonLocalPaths
  
  public static String[] nonLocalPaths(String paths, boolean testWindows)
  
  Return all non-local paths from a comma-separated list of paths.
- loadDefaultSparkProperties
  
  public static String loadDefaultSparkProperties(SparkConf conf, String filePath)
  
  Load default Spark properties from the given file. If no file is provided, use the common defaults file. This mutates state in the given SparkConf and in this JVM's system properties if the config specified in the file is not already set. Return the path of the properties file used.
  
  Parameters:
  
  conf - (undocumented)
  
  filePath - (undocumented)
  
  Returns:
  
  (undocumented)
- getPropertiesFromFile
  
  public static scala.collection.Map<String,String> getPropertiesFromFile(String filename)
  
  Load properties present in the given file.
- getDefaultPropertiesFile
  
  public static String getDefaultPropertiesFile(scala.collection.Map<String,String> env)
  
  Return the path of the default Spark properties file.
- exceptionString
  
  public static String exceptionString(Throwable e)
  
  Return a nice string representation of the exception. It will call "printStackTrace" to recursively generate the stack trace including the exception and its causes.
  
  Parameters:
  
  e - (undocumented)
  
  Returns:
  
  (undocumented)
- getThreadDump
  
  public static ThreadStackTrace[] getThreadDump()
  
  Return a thread dump of all threads' stacktraces. Used to capture dumps for the web UI
- getHeapHistogram
  
  public static String[] getHeapHistogram()
  
  Return a heap dump. Used to capture dumps for the web UI
- getThreadDumpForThread
  
  public static scala.Option<ThreadStackTrace> getThreadDumpForThread(long threadId)
- sparkJavaOpts
  
  public static scala.collection.immutable.Seq<String> sparkJavaOpts(SparkConf conf, scala.Function1<String,Object> filterKey)
  
  Convert all spark properties set in the given SparkConf to a sequence of java options.
  
  Parameters:
  
  conf - (undocumented)
  
  filterKey - (undocumented)
  
  Returns:
  
  (undocumented)
- portMaxRetries
  
  public static int portMaxRetries(SparkConf conf)
  
  Maximum number of retries when binding to a port before giving up.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- userPort
  
  public static int userPort(int base, int offset)
  
  Returns the user port to try when trying to bind a service. Handles wrapping and skipping privileged ports.
  
  Parameters:
  
  base - (undocumented)
  
  offset - (undocumented)
  
  Returns:
  
  (undocumented)
- startServiceOnPort
  
  public static <T> scala.Tuple2<T,Object> startServiceOnPort(int startPort, scala.Function1<Object,scala.Tuple2<T,Object>> startService, SparkConf conf, String serviceName)
  
  Attempt to start a service on the given port, or fail after a number of attempts. Use a shared configuration for the maximum number of port retries.
  
  Parameters:
  
  startPort - (undocumented)
  
  startService - (undocumented)
  
  conf - (undocumented)
  
  serviceName - (undocumented)
  
  Returns:
  
  (undocumented)
- startServiceOnPort
  
  public static <T> scala.Tuple2<T,Object> startServiceOnPort(int startPort, scala.Function1<Object,scala.Tuple2<T,Object>> startService, int maxRetries, String serviceName)
  
  Attempt to start a service on the given port, or fail after a number of attempts. Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0).
  
  Parameters:
  
  startPort - The initial port to start the service on.
  
  startService - Function to start service on a given port. This is expected to throw java.net.BindException on port collision.
  
  maxRetries - The maximum number of retries when binding to a port.
  
  serviceName - Name of the service.
  
  Returns:
  
  (service: T, port: Int)
- isBindCollision
  
  public static boolean isBindCollision(Throwable exception)
  
  Return whether the exception is caused by an address-port collision when binding.
  
  Parameters:
  
  exception - (undocumented)
  
  Returns:
  
  (undocumented)
- setLogLevel
  
  public static void setLogLevel(org.apache.logging.log4j.Level l)
  
  configure a new log4j level
  
  Parameters:
  
  l - (undocumented)
- setLogLevelIfNeeded
  
  public static void setLogLevelIfNeeded(String newLogLevel)
- getLogLevel
  
  public static String getLogLevel()
  
  Get current log level
  
  Returns:
  
  (undocumented)
- libraryPathEnvName
  
  public static String libraryPathEnvName()
  
  Return the current system LD_LIBRARY_PATH name
  
  Returns:
  
  (undocumented)
- libraryPathEnvPrefix
  
  public static String libraryPathEnvPrefix(scala.collection.immutable.Seq<String> libraryPaths)
  
  Return the prefix of a command that appends the given library paths to the system-specific library path environment variable. On Unix, for instance, this returns the string LD_LIBRARY_PATH="path1:path2:$LD_LIBRARY_PATH".
  
  Parameters:
  
  libraryPaths - (undocumented)
  
  Returns:
  
  (undocumented)
- getSparkOrYarnConfig
  
  public static String getSparkOrYarnConfig(SparkConf conf, String key, String default_)
  
  Return the value of a config either through the SparkConf or the Hadoop configuration. We Check whether the key is set in the SparkConf before look at any Hadoop configuration. If the key is set in SparkConf, no matter whether it is running on YARN or not, gets the value from SparkConf. Only when the key is not set in SparkConf and running on YARN, gets the value from Hadoop configuration.
  
  Parameters:
  
  conf - (undocumented)
  
  key - (undocumented)
  
  default_ - (undocumented)
  
  Returns:
  
  (undocumented)
- extractHostPortFromSparkUrl
  
  public static scala.Tuple2<String,Object> extractHostPortFromSparkUrl(String sparkUrl) throws SparkException
  
  Return a pair of host and port extracted from the sparkUrl.
  A spark url (spark://host:port) is a special URI that its scheme is spark and only contains host and port.
  
  Parameters:
  
  sparkUrl - (undocumented)
  
  Returns:
  
  (undocumented)
  
  Throws:
  
  SparkException - if sparkUrl is invalid.
- getCurrentUserName
  
  public static String getCurrentUserName()
  
  Returns the current user name. This is the currently logged in user, unless that's been overridden by the SPARK_USER environment variable.
  
  Returns:
  
  (undocumented)
- EMPTY_USER_GROUPS
  
  public static scala.collection.immutable.Set<String> EMPTY_USER_GROUPS()
- getCurrentUserGroups
  
  public static scala.collection.immutable.Set<String> getCurrentUserGroups(SparkConf sparkConf, String username)
- parseStandaloneMasterUrls
  
  public static String[] parseStandaloneMasterUrls(String masterUrls)
  
  Split the comma delimited string of master URLs into a list. For instance, "spark://abc,def" becomes [spark://abc, spark://def].
  
  Parameters:
  
  masterUrls - (undocumented)
  
  Returns:
  
  (undocumented)
- BACKUP_STANDALONE_MASTER_PREFIX
  
  public static String BACKUP_STANDALONE_MASTER_PREFIX()
  
  An identifier that backup masters use in their responses.
- responseFromBackup
  
  public static boolean responseFromBackup(String msg)
  
  Return true if the response message is sent from a backup Master on standby.
- withDummyCallSite
  
  public static <T> T withDummyCallSite(SparkContext sc, scala.Function0<T> body)
  
  To avoid calling Utils.getCallSite for every single RDD we create in the body, set a dummy call site that RDDs use instead. This is for performance optimization.
  
  Parameters:
  
  sc - (undocumented)
  
  body - (undocumented)
  
  Returns:
  
  (undocumented)
- isInDirectory
  
  public static boolean isInDirectory(File parent, File child)
  
  Return whether the specified file is a parent directory of the child file.
  
  Parameters:
  
  parent - (undocumented)
  
  child - (undocumented)
  
  Returns:
  
  (undocumented)
- isLocalMaster
  
  public static boolean isLocalMaster(ReadOnlySparkConf conf)
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  whether it is local mode
- isPushBasedShuffleEnabled
  
  public static boolean isPushBasedShuffleEnabled(SparkConf conf, boolean isDriver, boolean checkSerializer)
  
  Push based shuffle can only be enabled when below conditions are met: - the application is submitted to run in YARN mode - external shuffle service enabled - IO encryption disabled - serializer(such as KryoSerializer) supports relocation of serialized objects
  
  Parameters:
  
  conf - (undocumented)
  
  isDriver - (undocumented)
  
  checkSerializer - (undocumented)
  
  Returns:
  
  (undocumented)
- instantiateSerializerOrShuffleManager
  
  public static <T> T instantiateSerializerOrShuffleManager(String className, SparkConf conf, boolean isDriver)
- instantiateSerializerFromConf
  
  public static <T> T instantiateSerializerFromConf(org.apache.spark.internal.config.ConfigEntry<String> propertyName, SparkConf conf, boolean isDriver)
- isDynamicAllocationEnabled
  
  public static boolean isDynamicAllocationEnabled(ReadOnlySparkConf conf)
  
  Return whether dynamic allocation is enabled in the given conf.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- isStreamingDynamicAllocationEnabled
  
  public static boolean isStreamingDynamicAllocationEnabled(SparkConf conf)
- getDynamicAllocationInitialExecutors
  
  public static int getDynamicAllocationInitialExecutors(SparkConf conf)
  
  Return the initial number of executors for dynamic allocation.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- tempFileWith
  
  public static File tempFileWith(File path)
  
  Returns a path of temporary file which is in the same directory with path.
  
  Parameters:
  
  path - (undocumented)
  
  Returns:
  
  (undocumented)
- getProcessName
  
  public static String getProcessName()
  
  Returns the name of this JVM process. This is OS dependent but typically (OSX, Linux, Windows), this is formatted as PID@hostname.
  
  Returns:
  
  (undocumented)
- initDaemon
  
  public static void initDaemon(org.slf4j.Logger log)
  
  Utility function that should be called early in main() for daemons to set up some common diagnostic state.
  
  Parameters:
  
  log - (undocumented)
- resetStructuredLogging
  
  public static void resetStructuredLogging()
  
  Utility function to enable or disable structured logging based on system properties. This is designed for a code path which we cannot use SparkConf yet, and should be used before the first invocation of Logging.log(). For example, this should be used before initDaemon.
- resetStructuredLogging
  
  public static void resetStructuredLogging(SparkConf sparkConf)
  
  Utility function to enable or disable structured logging based on SparkConf. This is designed for a code path which logging system may be initilized before loading SparkConf.
  
  Parameters:
  
  sparkConf - (undocumented)
- getUserJars
  
  public static scala.collection.immutable.Seq<String> getUserJars(SparkConf conf)
  
  Return the jar files pointed by the "spark.jars" property. Spark internally will distribute these jars through file server. In the YARN mode, it will return an empty list, since YARN has its own mechanism to distribute jars.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- getLocalUserJarsForShell
  
  public static scala.collection.immutable.Seq<String> getLocalUserJarsForShell(SparkConf conf)
  
  Return the local jar files which will be added to REPL's classpath. These jar files are specified by --jars (spark.jars) or --packages, remote jars will be downloaded to local by SparkSubmit at first.
  
  Parameters:
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- redact
  
  public static scala.collection.Seq<scala.Tuple2<String,String>> redact(SparkConf conf, scala.collection.Seq<scala.Tuple2<String,String>> kvs)
  
  Redact the sensitive values in the given map. If a map key matches the redaction pattern then its value is replaced with a dummy text.
  
  Parameters:
  
  conf - (undocumented)
  
  kvs - (undocumented)
  
  Returns:
  
  (undocumented)
- redact
  
  public static <K, V> scala.collection.Seq<scala.Tuple2<K,V>> redact(scala.Option<scala.util.matching.Regex> regex, scala.collection.Seq<scala.Tuple2<K,V>> kvs)
  
  Redact the sensitive values in the given map. If a map key matches the redaction pattern then its value is replaced with a dummy text.
  
  Parameters:
  
  regex - (undocumented)
  
  kvs - (undocumented)
  
  Returns:
  
  (undocumented)
- redact
  
  public static String redact(scala.Option<scala.util.matching.Regex> regex, String text)
  
  Redact the sensitive information in the given string.
  
  Parameters:
  
  regex - (undocumented)
  
  text - (undocumented)
  
  Returns:
  
  (undocumented)
- redact
  
  public static scala.collection.Seq<scala.Tuple2<String,String>> redact(scala.collection.Map<String,String> kvs)
  
  Looks up the redaction regex from within the key value pairs and uses it to redact the rest of the key value pairs. No care is taken to make sure the redaction property itself is not redacted. So theoretically, the property itself could be configured to redact its own value when printing.
  
  Parameters:
  
  kvs - (undocumented)
  
  Returns:
  
  (undocumented)
- redactCommandLineArgs
  
  public static scala.collection.immutable.Seq<String> redactCommandLineArgs(SparkConf conf, scala.collection.immutable.Seq<String> commands)
- loadExtensions
  
  public static <T> scala.collection.immutable.Seq<T> loadExtensions(Class<T> extClass, scala.collection.immutable.Seq<String> classes, SparkConf conf)
  
  Create instances of extension classes.
  The classes in the given list must: - Be sub-classes of the given base class. - Provide either a no-arg constructor, or a 1-arg constructor that takes a SparkConf.
  The constructors are allowed to throw "UnsupportedOperationException" if the extension does not want to be registered; this allows the implementations to check the Spark configuration (or other state) and decide they do not need to be added. A log message is printed in that case. Other exceptions are bubbled up.
  
  Parameters:
  
  extClass - (undocumented)
  
  classes - (undocumented)
  
  conf - (undocumented)
  
  Returns:
  
  (undocumented)
- checkAndGetK8sMasterUrl
  
  public static String checkAndGetK8sMasterUrl(String rawMasterURL)
  
  Check the validity of the given Kubernetes master URL and return the resolved URL. Prefix "k8s://" is appended to the resolved URL as the prefix is used by KubernetesClusterManager in canCreate to determine if the KubernetesClusterManager should be used.
  
  Parameters:
  
  rawMasterURL - (undocumented)
  
  Returns:
  
  (undocumented)
- substituteAppNExecIds
  
  public static String substituteAppNExecIds(String opt, String appId, String execId)
  
  Replaces all the {{EXECUTOR_ID}} occurrences with the Executor Id and {{APP_ID}} occurrences with the App Id.
  
  Parameters:
  
  opt - (undocumented)
  
  appId - (undocumented)
  
  execId - (undocumented)
  
  Returns:
  
  (undocumented)
- substituteAppId
  
  public static String substituteAppId(String opt, String appId)
  
  Replaces all the {{APP_ID}} occurrences with the App Id.
  
  Parameters:
  
  opt - (undocumented)
  
  appId - (undocumented)
  
  Returns:
  
  (undocumented)
- createSecret
  
  public static String createSecret(SparkConf conf)
- stringHalfWidth
  
  public static int stringHalfWidth(String str)
  
  Return the number of half widths in a given string. Note that a full width character occupies two half widths.
  For a string consisting of 1 million characters, the execution of this method requires about 50ms.
  
  Parameters:
  
  str - (undocumented)
  
  Returns:
  
  (undocumented)
- sanitizeDirName
  
  public static String sanitizeDirName(String str)
- nameForAppAndAttempt
  
  public static String nameForAppAndAttempt(String appId, scala.Option<String> appAttemptId)
- isClientMode
  
  public static boolean isClientMode(SparkConf conf)
- isLocalUri
  
  public static boolean isLocalUri(String uri)
  
  Returns whether the URI is a "local:" URI.
- getUriBuilder
  
  public static jakarta.ws.rs.core.UriBuilder getUriBuilder(URI uri)
  
  Create a UriBuilder from URI object.
- getUriBuilder
  
  public static jakarta.ws.rs.core.UriBuilder getUriBuilder(String uri)
  
  Create a UriBuilder from URI string.
- isFileSplittable
  
  public static boolean isFileSplittable(org.apache.hadoop.fs.Path path, org.apache.hadoop.io.compress.CompressionCodecFactory codecFactory)
  
  Check whether the file of the path is splittable.
- cloneProperties
  
  public static Properties cloneProperties(Properties props)
  
  Create a new properties object with the same values as `props`
- buildLocationMetadata
  
  public static String buildLocationMetadata(scala.collection.immutable.Seq<org.apache.hadoop.fs.Path> paths, int stopAppendingThreshold)
  
  Convert a sequence of Paths to a metadata string. When the length of metadata string exceeds stopAppendingThreshold, stop appending paths for saving memory.
  
  Parameters:
  
  paths - (undocumented)
  
  stopAppendingThreshold - (undocumented)
  
  Returns:
  
  (undocumented)
- executorOffHeapMemorySizeAsMb
  
  public static int executorOffHeapMemorySizeAsMb(SparkConf sparkConf)
  
  Convert MEMORY_OFFHEAP_SIZE to MB Unit, return 0 if MEMORY_OFFHEAP_ENABLED is false.
  
  Parameters:
  
  sparkConf - (undocumented)
  
  Returns:
  
  (undocumented)
- checkOffHeapEnabled
  
  public static long checkOffHeapEnabled(SparkConf sparkConf, long offHeapSize)
  
  return 0 if MEMORY_OFFHEAP_ENABLED is false.
  
  Parameters:
  
  sparkConf - (undocumented)
  
  offHeapSize - (undocumented)
  
  Returns:
  
  (undocumented)
- createFailedToGetTokenMessage
  
  public static org.apache.spark.internal.MessageWithContext createFailedToGetTokenMessage(String serviceName, Throwable e)
  
  Returns a string message about delegation token generation failure
- unzipFilesFromFile
  
  public static scala.collection.immutable.Seq<File> unzipFilesFromFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path dfsZipFile, File localDir)
  
  Decompress a zip file into a local dir. File names are read from the zip file. Note, we skip addressing the directory here. Also, we rely on the caller side to address any exceptions.
  
  Parameters:
  
  fs - (undocumented)
  
  dfsZipFile - (undocumented)
  
  localDir - (undocumented)
  
  Returns:
  
  (undocumented)
- createResourceUninterruptiblyIfInTaskThread
  
  public static <R extends Closeable> R createResourceUninterruptiblyIfInTaskThread(scala.Function0<R> createResource)
  
  Create a resource uninterruptibly if we are in a task thread (i.e., TaskContext.get() != null). Otherwise, create the resource normally. This is mainly used in the situation where we want to create a multi-layer resource in a task thread. The uninterruptible behavior ensures we don't leak the underlying resources when there is a task cancellation request,
  
  Parameters:
  
  createResource - (undocumented)
  
  Returns:
  
  (undocumented)
- median
  
  public static long median(long[] sizes, boolean alreadySorted)
  
  Return the median number of a long array
  
  Parameters:
  
  sizes -
  
  alreadySorted -
  
  Returns:
- checkCommandAvailable
  
  public static boolean checkCommandAvailable(String command)
  
  Check if a command is available.
  
  Parameters:
  
  command - (undocumented)
  
  Returns:
  
  (undocumented)
- isG1GC
  
  public static boolean isG1GC()
- org$apache$spark$internal$Logging$$log_
  
  public static org.slf4j.Logger org$apache$spark$internal$Logging$$log_()
- org$apache$spark$internal$Logging$$log__$eq
  
  public static void org$apache$spark$internal$Logging$$log__$eq(org.slf4j.Logger x$1)
- LogStringContext
  
  public static org.apache.spark.internal.Logging.LogStringContext LogStringContext(scala.StringContext sc)
- getSparkClassLoader
  
  public static ClassLoader getSparkClassLoader()
- getContextOrSparkClassLoader
  
  public static ClassLoader getContextOrSparkClassLoader()
- classForName
  
  public static <C> Class<C> classForName(String className, boolean initialize, boolean noSparkClassLoader)
- classForName$default$2
  
  public static <C> boolean classForName$default$2()
- classForName$default$3
  
  public static <C> boolean classForName$default$3()
- classIsLoadable
  
  public static boolean classIsLoadable(String clazz)
- classIsLoadableAndAssignableFrom
  
  public static boolean classIsLoadableAndAssignableFrom(String clazz, Class<?> targetClass)
- getFormattedClassName
  
  public static String getFormattedClassName(Object obj)
- getSimpleName
  
  public static String getSimpleName(Class<?> cls)
- stripDollars
  
  public static final String stripDollars(String s)
- isTesting
  
  public static boolean isTesting()
- tryOrIOException
  
  public static <T> T tryOrIOException(scala.Function0<T> block)
- tryWithResource
  
  public static <R extends Closeable, T> T tryWithResource(scala.Function0<R> createResource, scala.Function1<R,T> f)
- tryInitializeResource
  
  public static <R extends Closeable, T> T tryInitializeResource(scala.Function0<R> createResource, scala.Function1<R,T> initialize)
- tryWithSafeFinally
  
  public static <T> T tryWithSafeFinally(scala.Function0<T> block, scala.Function0<scala.runtime.BoxedUnit> finallyBlock)
- stackTraceToString
  
  public static String stackTraceToString(Throwable t)
- resolveURI
  
  public static URI resolveURI(String path)
- recursiveList
  
  public static File[] recursiveList(File f)
- createDirectory
  
  public static boolean createDirectory(File dir)
- createDirectory
  
  public static File createDirectory(String root, String namePrefix)
- createDirectory$default$2
  
  public static String createDirectory$default$2()
- createTempDir$default$1
  
  public static String createTempDir$default$1()
- createTempDir$default$2
  
  public static String createTempDir$default$2()
- serialize
  
  public static <T> byte[] serialize(T o)
- deserialize
  
  public static <T> T deserialize(byte[] bytes)
- deserialize
  
  public static <T> T deserialize(byte[] bytes, ClassLoader loader)
- copyStream
  
  public static long copyStream(InputStream in, OutputStream out, boolean closeStreams, boolean transferToEnabled)
- copyStream$default$3
  
  public static boolean copyStream$default$3()
- copyStream$default$4
  
  public static boolean copyStream$default$4()
- copyFileStreamNIO
  
  public static void copyFileStreamNIO(FileChannel input, WritableByteChannel output, long startPosition, long bytesToCopy)
- stringToSeq
  
  public static scala.collection.immutable.Seq<String> stringToSeq(String str)

Class Utils

Nested Class Summary

Constructor Summary

Method Summary

Methods inherited from class java.lang.Object

Constructor Details

Utils

Method Details

random

DEFAULT_DRIVER_MEM_MB

MAX_DIR_CREATION_ATTEMPTS

LOCAL_SCHEME

deserializeLongValue

serializeViaNestedStream

deserializeViaNestedStream

weakIntern

withContextClassLoader

writeByteBuffer

writeByteBuffer

chmod700

createTempDir

copyStreamUpTo

encodeFileNameToURIRawPath

encodeRelativeUnixPathToURIRawPath

decodeFileNameInURI

fetchFile

unpack

timeTakenMs

doFetchFile

validateURL

getLocalDir

isInRunningSparkTask

getConfiguredLocalDirs

randomize

randomizeInPlace

setCustomHostname

localCanonicalHostName

localHostName

localHostNameForURI

checkHost

checkHostPort

parseHostPort

getUsedTimeNs

deleteRecursively

doesDirectoryContainAnyNewFiles

timeStringAsMs

timeStringAsSeconds

byteStringAsBytes

byteStringAsKb

byteStringAsMb

byteStringAsGb

memoryStringToMb

bytesToString

bytesToString

msDurationToString

megabytesToString

executeCommand

executeAndGetOutput

processStreamByLine

tryOrExit

tryOrStopSparkContext

tryLogNonFatalError

tryWithSafeFinallyAndFailureCallbacks

TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE

doTryWithCallerStacktrace

getTryWithCallerStacktrace

getCallSite

getFileLength

offsetBytes

offsetBytes

clone

splitCommandString

nonNegativeMod

nonNegativeHash

getSystemProperties

times

timeIt

getIteratorSize

getIteratorZipWithIndex

symlink