怎么把hadoop2.6.0 eclipse包导入到eclipse

[hadoop]Windows下eclipse导入hadoop源码,编译WordCount,打包jar - 林羽飞扬 - 博客园
hadoop版本为hadoop1.2.1
eclipse版本为eclipse-standard-kepler-SR2-win32-x86_64
WordCount.java为hadoop-1.2.1\src\examples\org\apache\hadoop\examples\WordCount.java
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
16 package org.apache.hadoop.
18 import java.io.IOE
19 import java.util.StringT
21 import org.apache.hadoop.conf.C
22 import org.apache.hadoop.fs.P
23 import org.apache.hadoop.io.IntW
24 import org.apache.hadoop.io.T
25 import org.apache.hadoop.mapreduce.J
26 import org.apache.hadoop.mapreduce.M
27 import org.apache.hadoop.mapreduce.R
28 import org.apache.hadoop.mapreduce.lib.input.FileInputF
29 import org.apache.hadoop.mapreduce.lib.output.FileOutputF
30 import org.apache.hadoop.util.GenericOptionsP
32 public class WordCount {
public static class TokenizerMapper
extends Mapper&Object, Text, Text, IntWritable&{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
public static class IntSumReducer
extends Reducer&Text,IntWritable,Text,IntWritable& {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable&IntWritable& values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
result.set(sum);
context.write(key, result);
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount &in& &out&");
System.exit(2);
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
在eclipse中新建java project,project名为WordCount
在project中新建class,类名为WordCount
再将上述代码覆盖eclipse中的WordCount.java
并将页首的package改了wordcount,改后的源码如下
3 import java.io.IOE
4 import java.util.StringT
6 import org.apache.hadoop.conf.C
7 import org.apache.hadoop.fs.P
8 import org.apache.hadoop.io.IntW
9 import org.apache.hadoop.io.T
10 import org.apache.hadoop.mapreduce.J
11 import org.apache.hadoop.mapreduce.M
12 import org.apache.hadoop.mapreduce.R
13 import org.apache.hadoop.mapreduce.lib.input.FileInputF
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputF
15 import org.apache.hadoop.util.GenericOptionsP
17 public class WordCount {
public static class TokenizerMapper
extends Mapper&Object, Text, Text, IntWritable&{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
public static class IntSumReducer
extends Reducer&Text,IntWritable,Text,IntWritable& {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable&IntWritable& values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
result.set(sum);
context.write(key, result);
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount &in& &out&");
System.exit(2);
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
1 import org.apache.hadoop.conf.C
2 import org.apache.hadoop.fs.P
3 import org.apache.hadoop.io.IntW
4 import org.apache.hadoop.io.T
5 import org.apache.hadoop.mapreduce.J
6 import org.apache.hadoop.mapreduce.M
7 import org.apache.hadoop.mapreduce.R
8 import org.apache.hadoop.mapreduce.lib.input.FileInputF
9 import org.apache.hadoop.mapreduce.lib.output.FileOutputF
10 import org.apache.hadoop.util.GenericOptionsP
可以看到源码import了好几个hadoop自定义类,非JDK环境自带的类,所以需要把这些依赖包导入eclipse中,不然编译器如何能找到这些类呢,得明确让编译器知道这些类所在位置。
这时候编译并运行一下,会发现有如下错误
Exception in thread "main" java.lang.Error: Unresolved compilation problems:
The import mons cannot be resolved
The import mons cannot be resolved
The import org.codehaus cannot be resolved
The import org.codehaus cannot be resolved
Log cannot be resolved to a type
LogFactory cannot be resolved
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
Log cannot be resolved to a type
JsonFactory cannot be resolved to a type
JsonFactory cannot be resolved to a type
JsonGenerator cannot be resolved to a type
at org.apache.hadoop.conf.Configuration.&init&(Configuration.java:60)
at wordcount.WordCount.main(WordCount.java:52)
原因是缺少依赖的jar库文件,再把缺少的jar库文件添加入库即可。
使用Add External JARs添加hadoop1.2.1\lib目录下所有jar文件。
再一次编译并运行,成功
&最后打包成为jar文件
file-&export
其中,WordCount.jar不是强求与类名相同,可以改为其他,譬如CountWord.jar,关系不大,然后点击Finish即可。
之后就可以在hadoop上运行了。运行WordCount详解可以参考
1 hadoop jar WordCount.jar WordCount input output
注意上述代码中是没有
1 package org.apache.hadoop.
倘若使用了package,那么jar文件中就有层次的,不再如hadoop jar WordCount.jar WordCount input output就可以运行了,需要详细指出WordCount(这个是主类的类名),运行命令改为
hadoop jar WordCount.jar org.apache.hadoop.examples.WordCount input output
关于这里打包的内容,在有讲述
本文基于进行许可。欢迎转载、演绎,但是必须保留本文的署名,若需咨询,编译打包eclipse hadoop plugin - 老田的博客 - ITeye技术网站
博客分类:
在hadoop最新的版本1.0.1中并没有附带hadoop的eclipse插件,因此需要我们自己编译打出一个hadoop-eclipse-plugin-1.0.1.jar插件
在hadoop-1.0.1\src\contrib下有个eclipse-plugin的文件夹存放了插件的源代码,打包时需要使用ant,请预先安装ant,由于仅仅是打包eclipse-plugin,而buidl.xml中需要整个hadoop都打包编译的基础上进行操作,因此我们需要修改一些文件。
首先,在hadoop-1.0.1\src\contrib下的build-contrib.xml中添加eclipse.home
&property name="eclipse.home" location="E:/MyHadoopWorkspace/eclipse" /&
其次修改eclipse-plugin下的build.xml文件,在classpath中添加hadoop-core和common lib的依赖
&path id="classpath"&
&pathelement location="${build.classes}"/&
&pathelement location="${hadoop.root}/build/classes"/&
&!-- hadoop-core-1.0.1.jar dependency --&
&pathelement location="${hadoop.root}"/&
&!-- common lib dependency --&
&pathelement location="${hadoop.root}/lib"/&
&path refid="eclipse-sdk-jars"/&
其此修改打进的jar包和打出的版本名称,按一下修改
&target name="jar" depends="compile" unless="skip.contrib"&
&mkdir dir="${build.dir}/lib"/&
&!-- 将以下jar包打进hadoop-eclipse-1.0.1.jar中 --&
&copy file="${hadoop.root}/hadoop-core-1.0.1.jar" tofile="${build.dir}/lib/hadoop-core.jar" verbose="true"/&
&copy file="${hadoop.root}/lib/commons-cli-1.2.jar"
todir="${build.dir}/lib" verbose="true"/&
&copy file="${hadoop.root}/lib/commons-lang-2.4.jar"
todir="${build.dir}/lib" verbose="true"/&
&copy file="${hadoop.root}/lib/commons-configuration-1.6.jar"
todir="${build.dir}/lib" verbose="true"/&
&copy file="${hadoop.root}/lib/jackson-mapper-asl-1.8.8.jar"
todir="${build.dir}/lib" verbose="true"/&
&copy file="${hadoop.root}/lib/jackson-core-asl-1.8.8.jar"
todir="${build.dir}/lib" verbose="true"/&
&copy file="${hadoop.root}/lib/commons-httpclient-3.0.1.jar"
todir="${build.dir}/lib" verbose="true"/&
jarfile="${build.dir}/hadoop-${name}-1.0.1.jar"
manifest="${root}/META-INF/MANIFEST.MF"&
&fileset dir="${build.dir}" includes="classes/ lib/"/&
&fileset dir="${root}" includes="resources/ plugin.xml"/&
最后执行命令ant jar就可以打出hadoop-eclipse-plugin-1.0.1.jar,打出的jar包放在hadoop-1.0.1\build\contrib\eclipse-plugin下,将其复制到eclipse目录下的plugins下,然后重启。打出的插件在eclipse Helios SR1下是可用的,可以在window -& show view -& MapReduce Tools视图下看到并进行配置。
iceflyingfox
浏览: 20463 次
来自: 北京
不错。谢谢~
您好,我使用的是hbase0.92.1与hadoop1.0.3 ...
iceflyingfox 写道hadoop-1.0.1错了,是 ...
hadoop-1.0.1
这个hbase版本对应的hadoop版本是哪个?!Eclipse调用Hadoop2.2运行MR程序_Linux编程_Linux公社-Linux系统门户网站
你好,游客
Eclipse调用Hadoop2.2运行MR程序
来源:Linux社区&
作者:fansy1990
:hadoop2.2 ,windows myeclipse环境;
Eclipse调用hadoop运行MR程序其实就是普通的java程序可以提交MR任务到集群执行而已。在Hadoop1中,只需指定jt(jobtracker)和fs(namenode)即可,一般如下:
Configuration conf = new Configuration();conf.set("mapred.job.tracker", "192.168.128.138:9001");conf.set("fs.default.name","192.168.128.138:9000");
上面的代码在hadoop1中运行是ok的,完全可以使用java提交任务到集群运行。但是,hadoop2却是没有了jt,新增了yarn。这个要如何使用呢?最简单的想法,同样指定其配置,试试。
Configuration conf = new YarnConfiguration();& & conf.set("fs.defaultFS", "hdfs://node31:9000");& & conf.set("mapreduce.framework.name", "yarn");& & conf.set("yarn.resourcemanager.address", "node31:8032");
恩,这样配置后,可以运行,首先是下面的错误:
21:20:21,568 ERROR [main] util.Shell (Shell.java:getWinUtilsPath(303)) - Failed to locate the winutils binary in the hadoop binary pathjava.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.&at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)&at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)&at org.apache.hadoop.util.Shell.&clinit&(Shell.java:293)&at org.apache.hadoop.util.StringUtils.&clinit&(StringUtils.java:76)&at org.apache.hadoop.yarn.conf.YarnConfiguration.&clinit&(YarnConfiguration.java:345)&at org.fansy.hadoop.mr.WordCount.getConf(WordCount.java:104)&at org.fansy.hadoop.mr.WordCount.runJob(WordCount.java:84)&at org.fansy.hadoop.mr.WordCount.main(WordCount.java:47)
这个错误不用管,这个好像是windows调用的时候就会出的错误。
然后是什么权限问题之类的,这个时候就需要去调整下权限,至少我目前是这样做的。调整的权限主要有/tmp 以及运行wordcount的输入、输出目录。命令如下: $HADOOP_HOME/bin/hadoop fs -chmod -R 777 /tmp 。
然后直到你出现了下面的错误,那么,好了,可以说你已经成功了一半了。
20:32:36,596 ERROR [main] security.UserGroupInformation (UserGroupInformation.java:doAs(1494)) - PriviledgedActionException as:Administrator (auth:SIMPLE) cause:java.io.IOException: Failed to run job : Application application_1_0001 failed 2 times due to AM Container for appattempt_1_ exited with& exitCode: 1 due to: Exception from container-launch: org.apache.hadoop.util.Shell$ExitCodeException: /bin/bash: line 0: fg: no job control
&at org.apache.hadoop.util.Shell.runCommand(Shell.java:464)&at org.apache.hadoop.util.Shell.run(Shell.java:379)&at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)&at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195)&at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:283)&at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:79)&at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334)&at java.util.concurrent.FutureTask.run(FutureTask.java:166)&at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)&at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)&at java.lang.Thread.run(Thread.java:724)
.Failing this attempt.. Failing the application.
用上面出现的错误去google,可以得到这个网页:https://issues.apache.org/jira/browse/MAPREDUCE-5655 。 恩,对的。这个网页就是我们的solution。
我们分为1、2、3步骤吧。
1. 修改MRapps.java 、YARNRunner.java的源码,然后打包替换原来的jar包中的相应class文件,这两个jar我已经打包,可以在这里下载http://download.csdn.net/detail/fansy 。然后替换集群中相应的jar吧,同时需要注意替换Myeclipse中导入的包。额,说起Myeclipse中的jar包,这里还是先上幅jar包的图吧:
2. 修改mapred-default.xml ,添加:(这个只需在eclipse中导入的jar包修改即可,修改后的jar包不用上传到集群)
&property&&&name&mapred.remote.os&/name&&&value&Linux&/value&&&description&& Remote MapReduce framework's OS, can be either Linux or Windows&&/description&&/property&
(题外话,添加了这个属性后,按说我new一个Configuration后,我使用conf.get("mapred.remote.os")的时候应该是可以得到Linux的,但是我得到的却是null,这个就不清楚是怎么了。)
其文件在:
这时,你再运行程序,额好吧程序基本可以提交了,但是还是报错,查看log,可以看到下面的错误:
Error: Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster
相关资讯 & & &
& (04月12日)
& (01月21日)
& (11/30/:44)
& (04月09日)
& (01月21日)
& (11/23/:07)
图片资讯 & & &
   同意评论声明
   发表
尊重网上道德,遵守中华人民共和国的各项有关法律法规
承担一切因您的行为而直接或间接导致的民事或刑事法律责任
本站管理人员有权保留或删除其管辖留言中的任意内容
本站有权在网站内转载或引用您的评论
参与本评论即表明您已经阅读并接受上述条款

我要回帖

更多关于 eclipse开发hadoop 的文章

 

随机推荐