nutchsolrclean.rb source

include Java
import org.apache.commons.logging.LogFactory
import org.apache.hadoop.hbase.util.VersionInfo
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.util.FSUtils
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.util.Writables
import org.apache.hadoop.hbase.HRegionInfo
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.SubstringComparator
import org.apache.hadoop.hbase.util.Bytes

# Name of this script
NAME = 'check_meta'

# Print usage for this script
def usage
  puts 'Usage: %s.rb [--fix]' % NAME
  puts ' fix   Try to fixup meta issues'
  puts 'Script checks consistency of the .META. table.  It reports if .META. has missing entries.'
  puts 'If you pass "--fix", it will try looking in the filesystem for the dropped region and if it'
  puts 'finds a likely candidate, it will try pluggin the .META. hole.'
  exit!
end

def isFixup
  # Are we to do fixup during this run
  usage if ARGV.size > 1
  fixup = nil
  if ARGV.size == 1
    usage unless ARGV[0].downcase.match('--fix.*')
    fixup = 1
  end
  return fixup
end

def getConfiguration
  hbase_twenty = VersionInfo.getVersion().match('0\.20\..*')
  # Get configuration to use.
  if hbase_twenty
    c = HBaseConfiguration.new()
  else
    c = HBaseConfiguration.create()
  end
  # Set hadoop filesystem configuration using the hbase.rootdir.
  # Otherwise, we'll always use localhost though the hbase.rootdir
  # might be pointing at hdfs location. Do old and new key for fs.
  c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
  c.set("fs.defaultFS", c.get(HConstants::HBASE_DIR))
  return c
end

def fixup(leftEdge, rightEdge, metatable, fs, rootdir)
  plugged = nil
  # Try and fix the passed holes in meta.
  tabledir = HTableDescriptor::getTableDir(rootdir, leftEdge.getTableDesc().getName())
  statuses = fs.listStatus(tabledir) 
  for status in statuses
    next unless status.isDir()
    next if status.getPath().getName() == "compaction.dir"
    regioninfofile =  Path.new(status.getPath(), ".regioninfo")
    unless fs.exists(regioninfofile)
      LOG.warn("Missing .regioninfo: " + regioninfofile.toString())
      next
    end
    is = fs.open(regioninfofile) 
    hri = HRegionInfo.new()
    hri.readFields(is)
    is.close() 
    next unless Bytes.equals(leftEdge.getEndKey(), hri.getStartKey())
    # TODO: Check against right edge to make sure this addition does not overflow right edge. 
    # TODO: Check that the schema matches both left and right edges schemas.
    p = Put.new(hri.getRegionName())
    p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
    metatable.put(p)
    LOG.info("Plugged hole in .META. at: " + hri.toString())
    plugged = true
  end
  return plugged
end

fixup = isFixup()

# Get configuration
conf = getConfiguration()

# Filesystem
fs = FileSystem.get(conf)

# Rootdir
rootdir = FSUtils.getRootDir(conf)

# Get a logger and a metautils instance.
LOG = LogFactory.getLog(NAME)

# Scan the .META. looking for holes
metatable = HTable.new(conf, 'test9_webpage')
scan = Scan.new()
filter1 = SingleColumnValueFilter.new(
Bytes.toBytes('f'),
Bytes.toBytes('st'),
CompareFilter::CompareOp.valueOf('EQUAL'),
Bytes.toBytes("\x00\x00\x00\x01")
);
scan.setFilter(filter1);

scanner = metatable.getScanner(scan)
oldHRI = nil
bad = nil 
while (result = scanner.next())
  rowid = Bytes.toString(result.getRow())
  rowidStr = java.lang.String.new(rowid)
  LOG.info("Testing rowid str " + rowidStr.toString())
  commandStr = java.lang.String.new("./deleteall.sh test9_webpage "+rowidStr.toString()+" | ./hbase shell")
  LOG.info(commandStr.toString())
  system commandStr.toString()
end
scanner.close()
# Return 0 if meta is good, else non-zero.
exit bad
Comments