nutchsolrclean.rb source
include Java
import org.apache.commons.logging.LogFactory
import org.apache.hadoop.hbase.util.VersionInfo
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.util.FSUtils
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.util.Writables
import org.apache.hadoop.hbase.HRegionInfo
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.SubstringComparator
import org.apache.hadoop.hbase.util.Bytes
# Name of this script
NAME = 'check_meta'
# Print usage for this script
def usage
puts 'Usage: %s.rb [--fix]' % NAME
puts ' fix Try to fixup meta issues'
puts 'Script checks consistency of the .META. table. It reports if .META. has missing entries.'
puts 'If you pass "--fix", it will try looking in the filesystem for the dropped region and if it'
puts 'finds a likely candidate, it will try pluggin the .META. hole.'
exit!
end
def isFixup
# Are we to do fixup during this run
usage if ARGV.size > 1
fixup = nil
if ARGV.size == 1
usage unless ARGV[0].downcase.match('--fix.*')
fixup = 1
end
return fixup
end
def getConfiguration
hbase_twenty = VersionInfo.getVersion().match('0\.20\..*')
# Get configuration to use.
if hbase_twenty
c = HBaseConfiguration.new()
else
c = HBaseConfiguration.create()
end
# Set hadoop filesystem configuration using the hbase.rootdir.
# Otherwise, we'll always use localhost though the hbase.rootdir
# might be pointing at hdfs location. Do old and new key for fs.
c.set("fs.default.name", c.get(HConstants::HBASE_DIR))
c.set("fs.defaultFS", c.get(HConstants::HBASE_DIR))
return c
end
def fixup(leftEdge, rightEdge, metatable, fs, rootdir)
plugged = nil
# Try and fix the passed holes in meta.
tabledir = HTableDescriptor::getTableDir(rootdir, leftEdge.getTableDesc().getName())
statuses = fs.listStatus(tabledir)
for status in statuses
next unless status.isDir()
next if status.getPath().getName() == "compaction.dir"
regioninfofile = Path.new(status.getPath(), ".regioninfo")
unless fs.exists(regioninfofile)
LOG.warn("Missing .regioninfo: " + regioninfofile.toString())
next
end
is = fs.open(regioninfofile)
hri = HRegionInfo.new()
hri.readFields(is)
is.close()
next unless Bytes.equals(leftEdge.getEndKey(), hri.getStartKey())
# TODO: Check against right edge to make sure this addition does not overflow right edge.
# TODO: Check that the schema matches both left and right edges schemas.
p = Put.new(hri.getRegionName())
p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
metatable.put(p)
LOG.info("Plugged hole in .META. at: " + hri.toString())
plugged = true
end
return plugged
end
fixup = isFixup()
# Get configuration
conf = getConfiguration()
# Filesystem
fs = FileSystem.get(conf)
# Rootdir
rootdir = FSUtils.getRootDir(conf)
# Get a logger and a metautils instance.
LOG = LogFactory.getLog(NAME)
# Scan the .META. looking for holes
metatable = HTable.new(conf, 'test9_webpage')
scan = Scan.new()
filter1 = SingleColumnValueFilter.new(
Bytes.toBytes('f'),
Bytes.toBytes('st'),
CompareFilter::CompareOp.valueOf('EQUAL'),
Bytes.toBytes("\x00\x00\x00\x01")
);
scan.setFilter(filter1);
scanner = metatable.getScanner(scan)
oldHRI = nil
bad = nil
while (result = scanner.next())
rowid = Bytes.toString(result.getRow())
rowidStr = java.lang.String.new(rowid)
LOG.info("Testing rowid str " + rowidStr.toString())
commandStr = java.lang.String.new("./deleteall.sh test9_webpage "+rowidStr.toString()+" | ./hbase shell")
LOG.info(commandStr.toString())
system commandStr.toString()
end
scanner.close()
# Return 0 if meta is good, else non-zero.
exit bad