nutchsolrclean.rb source

include Java

import org.apache.commons.logging.LogFactory

import org.apache.hadoop.hbase.util.VersionInfo

import org.apache.hadoop.hbase.HBaseConfiguration

import org.apache.hadoop.fs.FileSystem

import org.apache.hadoop.fs.Path

import org.apache.hadoop.hbase.HConstants

import org.apache.hadoop.hbase.util.FSUtils

import org.apache.hadoop.hbase.client.HTable

import org.apache.hadoop.hbase.client.Scan

import org.apache.hadoop.hbase.util.Writables

import org.apache.hadoop.hbase.HRegionInfo

import org.apache.hadoop.hbase.util.Bytes

import org.apache.hadoop.hbase.HTableDescriptor

import org.apache.hadoop.hbase.client.Put

import org.apache.hadoop.hbase.filter.CompareFilter

import org.apache.hadoop.hbase.filter.SingleColumnValueFilter

import org.apache.hadoop.hbase.filter.SubstringComparator

import org.apache.hadoop.hbase.util.Bytes

# Name of this script

NAME = 'check_meta'

# Print usage for this script

def usage

puts 'Usage: %s.rb [--fix]' % NAME

puts ' fix Try to fixup meta issues'

puts 'Script checks consistency of the .META. table. It reports if .META. has missing entries.'

puts 'If you pass "--fix", it will try looking in the filesystem for the dropped region and if it'

puts 'finds a likely candidate, it will try pluggin the .META. hole.'

exit!

end

def isFixup

# Are we to do fixup during this run

usage if ARGV.size > 1

fixup = nil

if ARGV.size == 1

usage unless ARGV[0].downcase.match('--fix.*')

fixup = 1

end

return fixup

end

def getConfiguration

hbase_twenty = VersionInfo.getVersion().match('0\.20\..*')

# Get configuration to use.

if hbase_twenty

c = HBaseConfiguration.new()

else

c = HBaseConfiguration.create()

end

# Set hadoop filesystem configuration using the hbase.rootdir.

# Otherwise, we'll always use localhost though the hbase.rootdir

# might be pointing at hdfs location. Do old and new key for fs.

c.set("fs.default.name", c.get(HConstants::HBASE_DIR))

c.set("fs.defaultFS", c.get(HConstants::HBASE_DIR))

return c

end

def fixup(leftEdge, rightEdge, metatable, fs, rootdir)

plugged = nil

# Try and fix the passed holes in meta.

tabledir = HTableDescriptor::getTableDir(rootdir, leftEdge.getTableDesc().getName())

statuses = fs.listStatus(tabledir)

for status in statuses

next unless status.isDir()

next if status.getPath().getName() == "compaction.dir"

regioninfofile = Path.new(status.getPath(), ".regioninfo")

unless fs.exists(regioninfofile)

LOG.warn("Missing .regioninfo: " + regioninfofile.toString())

next

end

is = fs.open(regioninfofile)

hri = HRegionInfo.new()

hri.readFields(is)

is.close()

next unless Bytes.equals(leftEdge.getEndKey(), hri.getStartKey())

# TODO: Check against right edge to make sure this addition does not overflow right edge.

# TODO: Check that the schema matches both left and right edges schemas.

p = Put.new(hri.getRegionName())

p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))

metatable.put(p)

LOG.info("Plugged hole in .META. at: " + hri.toString())

plugged = true

end

return plugged

end

fixup = isFixup()

# Get configuration

conf = getConfiguration()

# Filesystem

fs = FileSystem.get(conf)

# Rootdir

rootdir = FSUtils.getRootDir(conf)

# Get a logger and a metautils instance.

LOG = LogFactory.getLog(NAME)

# Scan the .META. looking for holes

metatable = HTable.new(conf, 'test9_webpage')

scan = Scan.new()

filter1 = SingleColumnValueFilter.new(

Bytes.toBytes('f'),

Bytes.toBytes('st'),

CompareFilter::CompareOp.valueOf('EQUAL'),

Bytes.toBytes("\x00\x00\x00\x01")

);

scan.setFilter(filter1);

scanner = metatable.getScanner(scan)

oldHRI = nil

bad = nil

while (result = scanner.next())

rowid = Bytes.toString(result.getRow())

rowidStr = java.lang.String.new(rowid)

LOG.info("Testing rowid str " + rowidStr.toString())

commandStr = java.lang.String.new("./deleteall.sh test9_webpage "+rowidStr.toString()+" | ./hbase shell")

LOG.info(commandStr.toString())

system commandStr.toString()

end

scanner.close()

# Return 0 if meta is good, else non-zero.

exit bad