Java Access to HDFS คำแนะนำการกำหนดค่า HDFS สำหรับระบบไฟล์แจกจ่าย Hadoop

ผู้เขียน：Eve Cole เวลาอัปเดต：2025-03-26 19:32:01

ไฟล์กำหนดค่า

M103 ถูกแทนที่ด้วยที่อยู่บริการ HDFS
ในการใช้ไคลเอนต์ Java เพื่อเข้าถึงไฟล์บน HDFS ฉันต้องบอกว่าไฟล์การกำหนดค่า hadoop-0.20.2/conf/core-site.xml เป็นไฟล์ที่ฉันได้รับความสูญเสียครั้งใหญ่ที่นี่ในตอนแรกดังนั้นฉันจึงไม่สามารถเชื่อมต่อกับ HDFs และไฟล์ไม่สามารถสร้างหรืออ่านได้

 <? xml version = "1.0"?> <? xml-styleheet type = "text/xsl" href = "configuration.xsl"?> <การกำหนดค่า> <! --- คุณสมบัติทั่วโลก-> <property> <name> hadoop.tmp.dir </name> ไดเรกทอรี </คำอธิบาย> </porement> <!-คุณสมบัติของระบบไฟล์-> <property> <name> fs.default.name </name> <value> hdfs: // linux-zzk-113: 9000 </alue>

รายการการกำหนดค่า: hadoop.tmp.dir แสดงตำแหน่งไดเรกทอรีที่เมทาดาทาถูกเก็บไว้ในโหนดที่มีชื่อและสำหรับโหนดข้อมูลมันเป็นไดเรกทอรีที่เก็บข้อมูลไฟล์ไว้ในโหนด

รายการการกำหนดค่า: fs.default.name แสดงถึงที่อยู่ IP และหมายเลขพอร์ตที่มีชื่อ ค่าเริ่มต้นคือไฟล์: /// สำหรับ Java API การเชื่อมต่อกับ HDFS จะต้องใช้ที่อยู่ URL ที่กำหนดค่าไว้ที่นี่ สำหรับโหนดข้อมูลโหนดข้อมูลเข้าถึงโหนดที่มีชื่อผ่าน URL นี้

hdfs-site.xml

 <? xml version = "1.0" การเข้ารหัส = "UTF-8"?> <!-Autogenerated โดย Cloudera Manager-> <การกำหนดค่า> <property> <name> dfs.namenode.name.dir </name> <value> ไฟล์: /// mnt/sdc1/dfs/nn <name> dfs.namenode.servicerpc-address </name> <value> M103: 8022 </value> </porement> <property> <name> dfs.https.address </name> <alue> M103: 50470 </value> <property> <name> dfs.namenode.http-address </name> <danue> M103: 50070 </alue> </property> <property> <name> dfs.replication </name> <alue> 3 </alue> </property> <property> <name> dfs.client.use.datanode.hostname </name> <value> false </value> </property> <property> <name> fs.permissions.umask-mode </name> <value> 022 </value> </property> <property> <name> dfs.block.local-path-access.user </name> <danue> cloudera-scm </value> </คุณสมบัติ> <property> <name> dfs.client.read.shortcircuit </name> <value>/var/run/hdfs-sockets/dn </value> </คุณสมบัติ> <property> <name> dfs.client.read.shortcircuit.skip.checksum </name> <alue> false </value> </porement> <property> <name> dfs.datanode.hdfs-blocks-metadata.enabled </name> <danue> true </value> </porement> <property> <name> fs.http.impl </name> <value> com.scistor.datavision.fs.httpfilesy

mapred-site.xml

 <? XML เวอร์ชัน = "1.0" การเข้ารหัส = "UTF-8"?> <!-Autogenerated โดย Cloudera Manager-> <การกำหนดค่า> <property> <name> mapReduce.job.split.metainfo.maxsize </name> <value> 120 </value> </perty> <property> <name> mapreduce.output.FileOutputFormat.compress </name> <value> true </value> </porement> <property> <name> mapreduce.Output.FileOutputFormat.compress.type </name> <name> mapreduce.output.fileoutputformat.compress.codec </name> <value> org.apache.hadoop.io.compress.snappycodec </value> </porement> <property> <name> mapreduce.map.utput.compress.codec </name> <name> mapreduce.map.output.compress </name> <value> true </value> </porement> <property> <name> zlib.compress.level </name> <value> default_compression </value> </คุณสมบัติ> <property> <name> <name> mapreduce.map.sort.spill.percent </name> <dance> 0.8 </alues> </porement> <property> <name> mapreduce.reduce.shuffle.parallelcopies </name> <danue> 10 </alue> </property> <property> <name> mapreduce.client.submit.file.replication </name> <danue> 1 </alue> </porement> <property> <name> mapreduce.job.reduces </name> <alues> 24 </value> </porement> <property> <name> <name> mapreduce.map.speculative </name> <danue> false </value> </porement> <property> <name> mapreduce.reduce.speculative </name> <value> false </value> </property> <property> <name> mapreduce.speculative </name> <name> mapreduce.job.reduce.slowstart.completedMaps </name> <dance> 0.8 </alue> </porement> <property> <name> mapreduce.jobhistory.address </name> <value> M103: 10020 </value> </property> <property> <value> M103: 19888 </alues> </perty> <property> <name> mapreduce.jobhistory.webapp.https.address </name> <value> M103: 19890 </alue> </porement> <property> <name> mapreduce.framework.name </name> <value> เส้นด้าย </value> </คุณสมบัติ> <property> <name> yarn.app.mapreduce.am.staging-dir </name> <value>/ผู้ใช้ </value> </property> <property> <name> yarn.app.mapreduce.am.resource.mb </name> <danue> 2048 </value> </คุณสมบัติ> <property> <name> yarn.app.mapreduce.am.resource.cpu-vcores </name> </perty> <property> <name> yarn.app.mapreduce.am.command-opts </name> <value> -djava.net.preferipv4stack = true -xmx1717986918 </value> <value> -djava.net.preferipv4stack = true -xmx1717986918 </value> </porement> <property> <name> mapreduce.reduce.java.opts </name> <dange> -djava.net.preferipv4stack = True -xmxmxxmxxmxxmx <name> yarn.app.mapreduce.am.admin.user.env </name> <value> ld_library_path = $ hadoop_common_home/lib/native: $ java_library_path </value> </คุณสมบัติ> <name> mapreduce.map.cpu.vcores </name> <dance> 1 </value> </คุณสมบัติ> <property> <name> mapreduce.dreduce.memory.mb </name> <value> 3072 </value> </property> <property> <name> mapreduce.map.cpu.vcores <name> mapreduce.reduce.cpu.vcores </name> <dance> 1 </alue> </porement> <property> <name> mapreduce.application.classpath </name> <value> $ hadoop_mapred_home/*, $ hadoop_mapred_home/lib/*, $ mr2_classpath, $ cdh_hcat_home/share/hcatalog/*, $ cdh_hive_home/lib/*,/etc/hive/conf <property> <name> mapreduce.admin.user.env </name> <dent> ld_library_path = $ hadoop_common_home/lib/native: $ java_library_path </value> </property> <property> <name>

ใช้ Java API เพื่อเข้าถึงไฟล์และไดเรกทอรี HDFS

 แพ็คเกจ com.demo.hdfs; นำเข้า java.io.bufferedinputstream; นำเข้า Java.io.fileinputstream; นำเข้า java.io.filenotfoundexception; นำเข้า java.io.fileoutputstream; นำเข้า java.io.ioException; org.apache.hadoop.conf.configuration; นำเข้า org.apache.hadoop.fs.fsdatainputstream; นำเข้า org.apache.hadoop.fs.fsdataOutputStream; นำเข้า org.apache.hadoop.fs.filestatus; org.apache.hadoop.io.ioutils; นำเข้า org.apache.hadoop.util.progressable;/*** @author zhangzk**/คลาสสาธารณะ fileCopytohdfs {โมฆะคงที่สาธารณะ // DELETEFROMHDFS (); // getDirectoryfromhdfs (); ภาคผนวก TOHDFS (); readfromhdfs (); } catch (exception e) {// todo บล็อก catch block ที่สร้างอัตโนมัติ e.printstacktrace (); } ในที่สุด {system.out.println ("ความสำเร็จ"); }}/** อัปโหลดไฟล์ไปยัง hdfs*/โมฆะคงที่ส่วนตัว uploadtohdfs () พ่น filenotfoundexception, ioexception {สตริง localsrc = "d: //qq.txt"; สตริง dst = "hdfs: //192.168.0.113: 9000/ผู้ใช้/Zhangzk/qq.txt"; inputStream ใน = ใหม่ bufferedInputStream (ใหม่ FileInputStream (localsRc)); การกำหนดค่า conf = การกำหนดค่าใหม่ (); ระบบไฟล์ fs = filesystem.get (uri.create (dst), conf); outputStream out = fs.create (เส้นทางใหม่ (DST), ใหม่ที่ก้าวหน้าได้ () {ความคืบหน้าเป็นโมฆะสาธารณะ () {system.out.print (".");}}); ioutils.copybytes (ใน, ออก, 4096, จริง); }/** อ่านไฟล์จาก hdfs*/โมฆะคงที่ส่วนตัว readfromhdfs () พ่น filenotfoundexception, ioexception {string dst = "hdfs: //192.168.0.113: 9000/ผู้ใช้/zhangzk/qq.txt"; การกำหนดค่า conf = การกำหนดค่าใหม่ (); ระบบไฟล์ fs = filesystem.get (uri.create (dst), conf); fsdatainputstream hdfsinstream = fs.open (เส้นทางใหม่ (DST)); outputStream out = ใหม่ fileOutputStream ("d: /qq-hdfs.txt"); ไบต์ [] ioBuffer = ไบต์ใหม่ [1024]; int readlen = hdfsinstream.read (ioBuffer); ในขณะที่ (-1! = readlen) {out.write (ioBuffer, 0, readlen); readlen = hdfsinstream.read (ioBuffer); } out.close (); hdfsinstream.close (); fs.close (); } /** เพิ่มเนื้อหาในตอนท้ายของไฟล์บน HDFS ในภาคผนวก หมายเหตุ: เมื่ออัปเดตไฟล์คุณต้องเพิ่ม <property> <name> dfs.append.support </name> <dent> true </value> </property>*/โมฆะส่วนตัวแบบคงที่ภาคผนวก appendtohdfs () โยน filenotfoundexception, ioexception {string dst = "hdfs: //192.168.168.168.168.168.168. การกำหนดค่า conf = การกำหนดค่าใหม่ (); ระบบไฟล์ fs = filesystem.get (uri.create (dst), conf); fsDataOutputStream out = fs.append (เส้นทางใหม่ (DST)); int readlen = "Zhangzk เพิ่มโดย HDFS Java API" .getBytes (). ความยาว; ในขณะที่ (-1! = readlen) {out.write ("Zhangzk เพิ่มโดย hdfs java api" .getBytes (), 0, readlen); } out.close (); fs.close (); }/** ลบไฟล์จาก hdfs*/โมฆะคงที่ส่วนตัว deletefromhdfs () พ่น filenotfoundexception, ioexception {string dst = "hdfs: //192.168.0.113: 9000/user/zhangzk/qq-bak.txt"; การกำหนดค่า conf = การกำหนดค่าใหม่ (); ระบบไฟล์ fs = filesystem.get (uri.create (dst), conf); fs.deleteonexit (เส้นทางใหม่ (DST)); fs.close (); }/** การถ่ายโอนไฟล์และไดเรกทอรีบน hdfs*/โมฆะคงที่ส่วนตัว getDirectoryFromhdfs () พ่น filenotfoundexception, ioexception {string dst = "hdfs: //192.168.0.113: 9000/ผู้ใช้/zhangzk"; การกำหนดค่า conf = การกำหนดค่าใหม่ (); ระบบไฟล์ fs = filesystem.get (uri.create (dst), conf); FileStatus fileList [] = fs.ListStatus (เส้นทางใหม่ (DST)); ขนาด int = filelist.length; สำหรับ (int i = 0; i <size; i ++) {system.out.println ("ชื่อ:" + filelist [i] .getPath (). getName () + "/t/tsize:" + fileList [i] .getLen ()); } fs.close (); -

หมายเหตุ: สำหรับการดำเนินการต่อท้ายมันยังไม่ได้รับการสนับสนุนตั้งแต่ Hadoop-0.21 สำหรับการดำเนินการภาคผนวกโปรดดูเอกสารเกี่ยวกับ Javaeye