How to get table names from SQL query?

Hope it will help you

Parse the given query using spark sql parser (spark internally does same). You can get sqlParser from session's state. It will give Logical plan of query. Iterate over logical plan of query & check whether it is instance of UnresolvedRelation (leaf logical operator to represent a table reference in a logical query plan that has yet to be resolved) & get table from it.

def getTables(query: String) : Seq[String] ={
    val logical : LogicalPlan = localsparkSession.sessionState.sqlParser.parsePlan(query)
    val tables = scala.collection.mutable.LinkedHashSet.empty[String]
    var i = 0
    while (true) {
      if (logical(i) == null) {
        return tables.toSeq
      } else if (logical(i).isInstanceOf[UnresolvedRelation]) {
        val tableIdentifier = logical(i).asInstanceOf[UnresolvedRelation].tableIdentifier
        tables += tableIdentifier.unquotedString.toLowerCase
      }
      i = i + 1
    }
    tables.toSeq
}

Thanks a lot @Swapnil Chougule for the answer. That inspired me to offer an idiomatic way of collecting all the tables in a structured query.

scala> spark.version
res0: String = 2.3.1

def getTables(query: String): Seq[String] = {
  val logicalPlan = spark.sessionState.sqlParser.parsePlan(query)
  import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
  logicalPlan.collect { case r: UnresolvedRelation => r.tableName }
}

val query = "select * from table_1 as a left join table_2 as b on a.id=b.id"
scala> getTables(query).foreach(println)
table_1
table_2