JavaAPI操作HDFS文件

代码示例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

package coom.anthony.hadoop.hdfs;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;

/**
* @Description: hadoop hdfs java API测试
* @Date: Created in 14:01 2018/3/24
* @Author: Anthony_Duan
*/
public class HDFSAPPTEST {

public static final String HDFS_PATH = "hdfs://localhost:8020";
FileSystem fileSystem = null;
Configuration configuration = null;


/**
* 创建文件夹
* @throws Exception
*/
@Test
public void mkdir()throws Exception{
fileSystem.mkdirs(new Path("/hdfsapi/test"));
}

/**
* 创建文件
* @throws Exception
*/
@Test
public void create() throws Exception{

FSDataOutputStream ouput = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
ouput.write("hello hadoop".getBytes());
ouput.flush();
ouput.close();

}

/**
* 查看HDFS文件的内容
* @throws Exception
*/
@Test
public void cat() throws Exception{
FSDataInputStream in = fileSystem.open(new Path("/hdfsapi/test/a.txt"));
IOUtils.copyBytes(in,System.out,1024);
in.close();
}



/**
* 重命名
*/
@Test
public void rename() throws Exception {
Path oldPath = new Path("/hdfsapi/test/a.txt");
Path newPath = new Path("/hdfsapi/test/b.txt");
fileSystem.rename(oldPath, newPath);
}

/**
* 从本地上传文件到HDFS
* @throws Exception
*/
@Test
public void copyFromLocalFile() throws Exception{
Path localPath = new Path("/Users/duanjiaxing/myhexo/db.json");
Path hdfsPath = new Path("/hdfsapi/test");
fileSystem.copyFromLocalFile(localPath,hdfsPath);
}


/**
* 带进度条的上传
* @throws Exception
*/
@Test
public void copyFromLocalFileWithProgress()throws Exception{
InputStream in = new BufferedInputStream(
new FileInputStream(
new File("/Users/duanjiaxing/software/kafka_2.11-0.9.0.0.tgz")
)
);
FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/software"),
new Progressable() {
public void progress() {
System.out.print(".");
}
}
);
IOUtils.copyBytes(in,output,4096);
}

/**
* 下载HDFS文件
* @throws Exception
*/
@Test
public void copyTolocalFile()throws Exception{
Path localPath = new Path("/Users/duanjiaxing/Desktop");
Path hdfsPath = new Path("/hdfsapi/test/software/kafka_2.11-0.9.0.0.tgz");
fileSystem.copyToLocalFile(hdfsPath,localPath);
}

/**
* 查看某个文件目录下所有文件
* @throws Exception
*/
@Test
public void listFile() throws Exception{
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus:fileStatuses){
String isDir = fileStatus.isDirectory()?"文件夹":"文件";
short replication = fileStatus.getReplication();//文件的副本信息
long len = fileStatus.getLen();//文件大小
String path = fileStatus.getPath().toString();//全路径

System.out.println(isDir+"\t"+replication+"\t"+len+"\t"+path);
}
}

/**
* 递归删除文件
* @throws Exception
*/
@Test
public void delete() throws Exception{
fileSystem.delete(new Path("/hdfsapi/test/software/kafka_2.11-0.9.0.0.tgz"),true);
}

@Before
public void setU() throws Exception{
System.out.print("HDFSAPPtest.setUp");
configuration = new Configuration();
// 这里可以传入用户名,我使用的是Mac本机上的hadoop,用户名是一样的,如果不是,需要传入用户名参数
fileSystem = FileSystem.get(new URI(HDFS_PATH),configuration);


}

@After
public void tearDown() throws Exception{
configuration = null;
fileSystem = null;
System.out.println("HDFSApp.tearDown");
}

}
  1. 如果你是通过hdfs shell的方式put的上去的那么,才采用默认的副本系数为配置文件中的参数,这里的配置文件中的副本系数是 1
  2. 如果我们是java api上传上去的,在本地我们并没有手工设置副本系数,所以采用的
    是hadoop自己的默认的副本系数是 3
-------------End Of This ArticleThank You For Reading-------------